blob: 550e19fa7c6ed886ab4e98fb4d2d26c1cf668aee [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/cast/sender/audio_encoder.h"
#include <stdint.h>
#include <algorithm>
#include <limits>
#include <string>
#include <utility>
#include "base/bind.h"
#include "base/callback_helpers.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/sys_byteorder.h"
#include "base/time/time.h"
#include "base/trace_event/trace_event.h"
#include "build/build_config.h"
#include "media/base/audio_sample_types.h"
#include "media/cast/common/rtp_time.h"
#include "media/cast/constants.h"
#if !defined(OS_IOS)
#include "third_party/opus/src/include/opus.h"
#endif
#if defined(OS_MAC)
#include <AudioToolbox/AudioToolbox.h>
#endif
namespace media {
namespace cast {
namespace {
const int kUnderrunSkipThreshold = 3;
const int kDefaultFramesPerSecond = 100;
} // namespace
// Base class that handles the common problem of feeding one or more AudioBus'
// data into a buffer and then, once the buffer is full, encoding the signal and
// emitting a SenderEncodedFrame via the FrameEncodedCallback.
//
// Subclasses complete the implementation by handling the actual encoding
// details.
class AudioEncoder::ImplBase
: public base::RefCountedThreadSafe<AudioEncoder::ImplBase> {
public:
ImplBase(const scoped_refptr<CastEnvironment>& cast_environment,
Codec codec,
int num_channels,
int sampling_rate,
int samples_per_frame,
FrameEncodedCallback callback)
: cast_environment_(cast_environment),
codec_(codec),
num_channels_(num_channels),
samples_per_frame_(samples_per_frame),
callback_(std::move(callback)),
operational_status_(STATUS_UNINITIALIZED),
frame_duration_(base::Seconds(static_cast<double>(samples_per_frame_) /
sampling_rate)),
buffer_fill_end_(0),
frame_id_(FrameId::first()),
samples_dropped_from_buffer_(0) {
// Support for max sampling rate of 48KHz, 2 channels, 100 ms duration.
const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100;
if (num_channels_ <= 0 || samples_per_frame_ <= 0 ||
frame_duration_.is_zero() ||
samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) {
operational_status_ = STATUS_INVALID_CONFIGURATION;
}
}
OperationalStatus InitializationResult() const {
return operational_status_;
}
int samples_per_frame() const {
return samples_per_frame_;
}
base::TimeDelta frame_duration() const { return frame_duration_; }
void EncodeAudio(std::unique_ptr<AudioBus> audio_bus,
const base::TimeTicks& recorded_time) {
DCHECK_EQ(operational_status_, STATUS_INITIALIZED);
DCHECK(!recorded_time.is_null());
// Determine whether |recorded_time| is consistent with the amount of audio
// data having been processed in the past. Resolve the underrun problem by
// dropping data from the internal buffer and skipping ahead the next
// frame's RTP timestamp by the estimated number of frames missed. On the
// other hand, don't attempt to resolve overruns: A receiver should
// gracefully deal with an excess of audio data.
base::TimeDelta buffer_fill_duration =
buffer_fill_end_ * frame_duration_ / samples_per_frame_;
if (!frame_capture_time_.is_null()) {
const base::TimeDelta amount_ahead_by =
recorded_time - (frame_capture_time_ + buffer_fill_duration);
const int64_t num_frames_missed = amount_ahead_by.IntDiv(frame_duration_);
if (num_frames_missed > kUnderrunSkipThreshold) {
samples_dropped_from_buffer_ += buffer_fill_end_;
buffer_fill_end_ = 0;
buffer_fill_duration = base::TimeDelta();
frame_rtp_timestamp_ +=
RtpTimeDelta::FromTicks(num_frames_missed * samples_per_frame_);
DVLOG(1) << "Skipping RTP timestamp ahead to account for "
<< num_frames_missed * samples_per_frame_
<< " samples' worth of underrun.";
TRACE_EVENT_INSTANT2("cast.stream", "Audio Skip",
TRACE_EVENT_SCOPE_THREAD,
"frames missed", num_frames_missed,
"samples dropped", samples_dropped_from_buffer_);
}
}
frame_capture_time_ = recorded_time - buffer_fill_duration;
// Encode all audio in |audio_bus| into zero or more frames.
int src_pos = 0;
while (src_pos < audio_bus->frames()) {
// Note: This is used to compute the encoder utilization and so it uses
// the real-world clock instead of the CastEnvironment clock, the latter
// of which might be simulated.
const base::TimeTicks start_time = base::TimeTicks::Now();
const int num_samples_to_xfer = std::min(
samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos);
DCHECK_EQ(audio_bus->channels(), num_channels_);
TransferSamplesIntoBuffer(
audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer);
src_pos += num_samples_to_xfer;
buffer_fill_end_ += num_samples_to_xfer;
if (buffer_fill_end_ < samples_per_frame_)
break;
std::unique_ptr<SenderEncodedFrame> audio_frame(new SenderEncodedFrame());
audio_frame->dependency = EncodedFrame::KEY;
audio_frame->frame_id = frame_id_;
audio_frame->referenced_frame_id = frame_id_;
audio_frame->rtp_timestamp = frame_rtp_timestamp_;
audio_frame->reference_time = frame_capture_time_;
TRACE_EVENT_NESTABLE_ASYNC_BEGIN2(
"cast.stream", "Audio Encode", TRACE_ID_LOCAL(audio_frame.get()),
"frame_id", frame_id_.lower_32_bits(), "rtp_timestamp",
frame_rtp_timestamp_.lower_32_bits());
if (EncodeFromFilledBuffer(&audio_frame->data)) {
// Compute encoder utilization as the real-world time elapsed divided
// by the signal duration.
audio_frame->encoder_utilization =
(base::TimeTicks::Now() - start_time) / frame_duration_;
TRACE_EVENT_NESTABLE_ASYNC_END1(
"cast.stream", "Audio Encode", TRACE_ID_LOCAL(audio_frame.get()),
"encoder_utilization", audio_frame->encoder_utilization);
audio_frame->encode_completion_time =
cast_environment_->Clock()->NowTicks();
cast_environment_->PostTask(
CastEnvironment::MAIN, FROM_HERE,
base::BindOnce(callback_, std::move(audio_frame),
samples_dropped_from_buffer_));
samples_dropped_from_buffer_ = 0;
}
// Reset the internal buffer, frame ID, and timestamps for the next frame.
buffer_fill_end_ = 0;
++frame_id_;
frame_rtp_timestamp_ += RtpTimeDelta::FromTicks(samples_per_frame_);
frame_capture_time_ += frame_duration_;
}
}
protected:
friend class base::RefCountedThreadSafe<ImplBase>;
virtual ~ImplBase() = default;
virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) = 0;
virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
const scoped_refptr<CastEnvironment> cast_environment_;
const Codec codec_;
const int num_channels_;
const int samples_per_frame_;
const FrameEncodedCallback callback_;
// Subclass' ctor is expected to set this to STATUS_INITIALIZED.
OperationalStatus operational_status_;
// The duration of one frame of encoded audio samples. Derived from
// |samples_per_frame_| and the sampling rate.
const base::TimeDelta frame_duration_;
private:
// In the case where a call to EncodeAudio() cannot completely fill the
// buffer, this points to the position at which to populate data in a later
// call.
int buffer_fill_end_;
// A counter used to label EncodedFrames.
FrameId frame_id_;
// The RTP timestamp for the next frame of encoded audio. This is defined as
// the number of audio samples encoded so far, plus the estimated number of
// samples that were missed due to data underruns. A receiver uses this value
// to detect gaps in the audio signal data being provided.
RtpTimeTicks frame_rtp_timestamp_;
// The local system time associated with the start of the next frame of
// encoded audio. This value is passed on to a receiver as a reference clock
// timestamp for the purposes of synchronizing audio and video. Its
// progression is expected to drift relative to the elapsed time implied by
// the RTP timestamps.
base::TimeTicks frame_capture_time_;
// Set to non-zero to indicate the next output frame skipped over audio
// samples in order to recover from an input underrun.
int samples_dropped_from_buffer_;
DISALLOW_COPY_AND_ASSIGN(ImplBase);
};
#if !defined(OS_IOS)
class AudioEncoder::OpusImpl final : public AudioEncoder::ImplBase {
public:
OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment,
int num_channels,
int sampling_rate,
int bitrate,
FrameEncodedCallback callback)
: ImplBase(cast_environment,
CODEC_AUDIO_OPUS,
num_channels,
sampling_rate,
sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
std::move(callback)),
encoder_memory_(new uint8_t[opus_encoder_get_size(num_channels)]),
opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
buffer_(new float[num_channels * samples_per_frame_]) {
if (ImplBase::operational_status_ != STATUS_UNINITIALIZED ||
sampling_rate % samples_per_frame_ != 0 ||
!IsValidFrameDuration(frame_duration_)) {
return;
}
if (opus_encoder_init(opus_encoder_,
sampling_rate,
num_channels,
OPUS_APPLICATION_AUDIO) != OPUS_OK) {
ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
return;
}
ImplBase::operational_status_ = STATUS_INITIALIZED;
if (bitrate <= 0) {
// Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
// variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
// frame size. The opus library authors may, of course, adjust this in
// later versions.
bitrate = OPUS_AUTO;
}
CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
OPUS_OK);
}
private:
~OpusImpl() final = default;
void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) final {
DCHECK_EQ(audio_bus->channels(), num_channels_);
float* dest = buffer_.get() + (buffer_fill_offset * num_channels_);
audio_bus->ToInterleavedPartial<Float32SampleTypeTraits>(source_offset,
num_samples, dest);
}
bool EncodeFromFilledBuffer(std::string* out) final {
out->resize(kOpusMaxPayloadSize);
const opus_int32 result = opus_encode_float(
opus_encoder_, buffer_.get(), samples_per_frame_,
reinterpret_cast<uint8_t*>(base::data(*out)), kOpusMaxPayloadSize);
if (result > 1) {
out->resize(result);
return true;
} else if (result < 0) {
LOG(ERROR) << "Error code from opus_encode_float(): " << result;
return false;
} else {
// Do nothing: The documentation says that a return value of zero or
// one byte means the packet does not need to be transmitted.
return false;
}
}
static bool IsValidFrameDuration(base::TimeDelta duration) {
// See https://tools.ietf.org/html/rfc6716#section-2.1.4
return duration == base::Microseconds(2500) ||
duration == base::Milliseconds(5) ||
duration == base::Milliseconds(10) ||
duration == base::Milliseconds(20) ||
duration == base::Milliseconds(40) ||
duration == base::Milliseconds(60);
}
const std::unique_ptr<uint8_t[]> encoder_memory_;
OpusEncoder* const opus_encoder_;
const std::unique_ptr<float[]> buffer_;
// This is the recommended value, according to documentation in
// third_party/opus/src/include/opus.h, so that the Opus encoder does not
// degrade the audio due to memory constraints.
//
// Note: Whereas other RTP implementations do not, the cast library is
// perfectly capable of transporting larger than MTU-sized audio frames.
static const int kOpusMaxPayloadSize = 4000;
DISALLOW_COPY_AND_ASSIGN(OpusImpl);
};
#endif
#if defined(OS_MAC)
class AudioEncoder::AppleAacImpl final : public AudioEncoder::ImplBase {
// AAC-LC has two access unit sizes (960 and 1024). The Apple encoder only
// supports the latter.
static const int kAccessUnitSamples = 1024;
// Size of an ADTS header (w/o checksum). See
// http://wiki.multimedia.cx/index.php?title=ADTS
static const int kAdtsHeaderSize = 7;
public:
AppleAacImpl(const scoped_refptr<CastEnvironment>& cast_environment,
int num_channels,
int sampling_rate,
int bitrate,
FrameEncodedCallback callback)
: ImplBase(cast_environment,
CODEC_AUDIO_AAC,
num_channels,
sampling_rate,
kAccessUnitSamples,
std::move(callback)),
input_buffer_(AudioBus::Create(num_channels, kAccessUnitSamples)),
input_bus_(AudioBus::CreateWrapper(num_channels)),
max_access_unit_size_(0),
output_buffer_(nullptr),
converter_(nullptr),
file_(nullptr),
num_access_units_(0) {
if (ImplBase::operational_status_ != STATUS_UNINITIALIZED) {
return;
}
if (!Initialize(sampling_rate, bitrate)) {
ImplBase::operational_status_ = STATUS_INVALID_CONFIGURATION;
return;
}
ImplBase::operational_status_ = STATUS_INITIALIZED;
}
private:
~AppleAacImpl() override { Teardown(); }
// Destroys the existing audio converter and file, if any.
void Teardown() {
if (converter_) {
AudioConverterDispose(converter_);
converter_ = nullptr;
}
if (file_) {
AudioFileClose(file_);
file_ = nullptr;
}
}
// Initializes the audio converter and file. Calls Teardown to destroy any
// existing state. This is so that Initialize() may be called to setup another
// converter after a non-resumable interruption.
bool Initialize(int sampling_rate, int bitrate) {
// Teardown previous audio converter and file.
Teardown();
// Input data comes from AudioBus objects, which carry non-interleaved
// packed native-endian float samples. Note that in Core Audio, a frame is
// one sample across all channels at a given point in time. When describing
// a non-interleaved samples format, the "per frame" fields mean "per
// channel" or "per stream", with the exception of |mChannelsPerFrame|. For
// uncompressed formats, one packet contains one frame.
AudioStreamBasicDescription in_asbd;
in_asbd.mSampleRate = sampling_rate;
in_asbd.mFormatID = kAudioFormatLinearPCM;
in_asbd.mFormatFlags =
kAudioFormatFlagsNativeFloatPacked | kAudioFormatFlagIsNonInterleaved;
in_asbd.mChannelsPerFrame = num_channels_;
in_asbd.mBitsPerChannel = sizeof(float) * 8;
in_asbd.mFramesPerPacket = 1;
in_asbd.mBytesPerPacket = in_asbd.mBytesPerFrame = sizeof(float);
in_asbd.mReserved = 0;
// Request AAC-LC encoding, with no downmixing or downsampling.
AudioStreamBasicDescription out_asbd;
memset(&out_asbd, 0, sizeof(AudioStreamBasicDescription));
out_asbd.mSampleRate = sampling_rate;
out_asbd.mFormatID = kAudioFormatMPEG4AAC;
out_asbd.mChannelsPerFrame = num_channels_;
UInt32 prop_size = sizeof(out_asbd);
if (AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
0,
nullptr,
&prop_size,
&out_asbd) != noErr) {
return false;
}
if (AudioConverterNew(&in_asbd, &out_asbd, &converter_) != noErr) {
return false;
}
// The converter will fully specify the output format and update the
// relevant fields of the structure, which we can now query.
prop_size = sizeof(out_asbd);
if (AudioConverterGetProperty(converter_,
kAudioConverterCurrentOutputStreamDescription,
&prop_size,
&out_asbd) != noErr) {
return false;
}
// If bitrate is <= 0, allow the encoder to pick a suitable value.
// Otherwise, set the bitrate (which can fail if the value is not suitable
// or compatible with the output sampling rate or channels).
if (bitrate > 0) {
prop_size = sizeof(int);
if (AudioConverterSetProperty(
converter_, kAudioConverterEncodeBitRate, prop_size, &bitrate) !=
noErr) {
return false;
}
}
// Figure out the maximum size of an access unit that the encoder can
// produce. |mBytesPerPacket| will be 0 for variable size configurations,
// in which case we must query the value.
uint32_t max_access_unit_size = out_asbd.mBytesPerPacket;
if (max_access_unit_size == 0) {
prop_size = sizeof(max_access_unit_size);
if (AudioConverterGetProperty(
converter_,
kAudioConverterPropertyMaximumOutputPacketSize,
&prop_size,
&max_access_unit_size) != noErr) {
return false;
}
}
// This is the only location where the implementation modifies
// |max_access_unit_size_|.
const_cast<uint32_t&>(max_access_unit_size_) = max_access_unit_size;
// Allocate a buffer to store one access unit. This is the only location
// where the implementation modifies |access_unit_buffer_|.
const_cast<std::unique_ptr<uint8_t[]>&>(access_unit_buffer_)
.reset(new uint8_t[max_access_unit_size]);
// Initialize the converter ABL. Note that the buffer size has to be set
// before every encode operation, since the field is modified to indicate
// the size of the output data (on input it indicates the buffer capacity).
converter_abl_.mNumberBuffers = 1;
converter_abl_.mBuffers[0].mNumberChannels = num_channels_;
converter_abl_.mBuffers[0].mData = access_unit_buffer_.get();
// The "magic cookie" is an encoder state vector required for decoding and
// packetization. It is queried now from |converter_| then set on |file_|
// after initialization.
UInt32 cookie_size;
if (AudioConverterGetPropertyInfo(converter_,
kAudioConverterCompressionMagicCookie,
&cookie_size,
nullptr) != noErr) {
return false;
}
std::unique_ptr<uint8_t[]> cookie_data(new uint8_t[cookie_size]);
if (AudioConverterGetProperty(converter_,
kAudioConverterCompressionMagicCookie,
&cookie_size,
cookie_data.get()) != noErr) {
return false;
}
if (AudioFileInitializeWithCallbacks(this,
&FileReadCallback,
&FileWriteCallback,
&FileGetSizeCallback,
&FileSetSizeCallback,
kAudioFileAAC_ADTSType,
&out_asbd,
0,
&file_) != noErr) {
return false;
}
if (AudioFileSetProperty(file_,
kAudioFilePropertyMagicCookieData,
cookie_size,
cookie_data.get()) != noErr) {
return false;
}
// Initially the input bus points to the input buffer. See the comment on
// |input_bus_| for more on this optimization.
input_bus_->set_frames(kAccessUnitSamples);
for (int ch = 0; ch < input_buffer_->channels(); ++ch) {
input_bus_->SetChannelData(ch, input_buffer_->channel(ch));
}
return true;
}
void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) final {
DCHECK_EQ(audio_bus->channels(), input_buffer_->channels());
// See the comment on |input_bus_| for more on this optimization. Note that
// we cannot elide the copy if the source offset would result in an
// unaligned pointer.
if (num_samples == kAccessUnitSamples &&
source_offset * sizeof(float) % AudioBus::kChannelAlignment == 0) {
DCHECK_EQ(buffer_fill_offset, 0);
for (int ch = 0; ch < audio_bus->channels(); ++ch) {
auto* samples = const_cast<float*>(audio_bus->channel(ch));
input_bus_->SetChannelData(ch, samples + source_offset);
}
return;
}
// Copy the samples into the input buffer.
DCHECK_EQ(input_bus_->channel(0), input_buffer_->channel(0));
audio_bus->CopyPartialFramesTo(
source_offset, num_samples, buffer_fill_offset, input_buffer_.get());
}
bool EncodeFromFilledBuffer(std::string* out) final {
// Reset the buffer size field to the buffer capacity.
converter_abl_.mBuffers[0].mDataByteSize = max_access_unit_size_;
// Encode the current input buffer. This is a sychronous call.
OSStatus oserr;
UInt32 io_num_packets = 1;
AudioStreamPacketDescription packet_description;
oserr = AudioConverterFillComplexBuffer(converter_,
&ConverterFillDataCallback,
this,
&io_num_packets,
&converter_abl_,
&packet_description);
if (oserr != noErr || io_num_packets == 0) {
return false;
}
// Reserve space in the output buffer to write the packet.
out->reserve(packet_description.mDataByteSize + kAdtsHeaderSize);
// Set the current output buffer and emit an ADTS-wrapped AAC access unit.
// This is a synchronous call. After it returns, reset the output buffer.
output_buffer_ = out;
oserr = AudioFileWritePackets(file_,
false,
converter_abl_.mBuffers[0].mDataByteSize,
&packet_description,
num_access_units_,
&io_num_packets,
converter_abl_.mBuffers[0].mData);
output_buffer_ = nullptr;
if (oserr != noErr || io_num_packets == 0) {
return false;
}
num_access_units_ += io_num_packets;
return true;
}
// The |AudioConverterFillComplexBuffer| input callback function. Configures
// the provided |AudioBufferList| to alias |input_bus_|. The implementation
// can only supply |kAccessUnitSamples| samples as a result of not copying
// samples or tracking read and write positions. Note that this function is
// called synchronously by |AudioConverterFillComplexBuffer|.
static OSStatus ConverterFillDataCallback(
AudioConverterRef in_converter,
UInt32* io_num_packets,
AudioBufferList* io_data,
AudioStreamPacketDescription** out_packet_desc,
void* in_encoder) {
DCHECK(in_encoder);
auto* encoder = reinterpret_cast<AppleAacImpl*>(in_encoder);
auto* input_buffer = encoder->input_buffer_.get();
auto* input_bus = encoder->input_bus_.get();
DCHECK_EQ(static_cast<int>(*io_num_packets), kAccessUnitSamples);
DCHECK_EQ(io_data->mNumberBuffers,
static_cast<unsigned>(input_bus->channels()));
for (int i_buf = 0, end = io_data->mNumberBuffers; i_buf < end; ++i_buf) {
io_data->mBuffers[i_buf].mNumberChannels = 1;
io_data->mBuffers[i_buf].mDataByteSize = sizeof(float) * *io_num_packets;
io_data->mBuffers[i_buf].mData = input_bus->channel(i_buf);
// Reset the input bus back to the input buffer. See the comment on
// |input_bus_| for more on this optimization.
input_bus->SetChannelData(i_buf, input_buffer->channel(i_buf));
}
return noErr;
}
// The AudioFile read callback function.
static OSStatus FileReadCallback(void* in_encoder,
SInt64 in_position,
UInt32 in_size,
void* in_buffer,
UInt32* out_size) {
// This class only does writing.
NOTREACHED();
return kAudioFileNotOpenError;
}
// The AudioFile write callback function. Appends the data to the encoder's
// current |output_buffer_|.
static OSStatus FileWriteCallback(void* in_encoder,
SInt64 in_position,
UInt32 in_size,
const void* in_buffer,
UInt32* out_size) {
DCHECK(in_encoder);
DCHECK(in_buffer);
auto* encoder = reinterpret_cast<const AppleAacImpl*>(in_encoder);
auto* buffer = reinterpret_cast<const std::string::value_type*>(in_buffer);
std::string* const output_buffer = encoder->output_buffer_;
DCHECK(output_buffer);
output_buffer->append(buffer, in_size);
*out_size = in_size;
return noErr;
}
// The AudioFile getsize callback function.
static SInt64 FileGetSizeCallback(void* in_encoder) {
// This class only does writing.
NOTREACHED();
return 0;
}
// The AudioFile setsize callback function.
static OSStatus FileSetSizeCallback(void* in_encoder, SInt64 in_size) {
return noErr;
}
// Buffer that holds one AAC access unit worth of samples. The input callback
// function provides samples from this buffer via |input_bus_| to the encoder.
const std::unique_ptr<AudioBus> input_buffer_;
// Wrapper AudioBus used by the input callback function. Normally it wraps
// |input_buffer_|. However, as an optimization when the client submits a
// buffer containing exactly one access unit worth of samples, the bus is
// redirected to the client buffer temporarily. We know that the base
// implementation will call us right after to encode the buffer and thus we
// can eliminate the copy into |input_buffer_|.
const std::unique_ptr<AudioBus> input_bus_;
// A buffer that holds one AAC access unit. Initialized in |Initialize| once
// the maximum access unit size is known.
const std::unique_ptr<uint8_t[]> access_unit_buffer_;
// The maximum size of an access unit that the encoder can emit.
const uint32_t max_access_unit_size_;
// A temporary pointer to the current output buffer. Only non-null when
// writing an access unit. Accessed by the AudioFile write callback function.
std::string* output_buffer_;
// The |AudioConverter| is responsible for AAC encoding. This is a Core Audio
// object, not to be confused with |media::AudioConverter|.
AudioConverterRef converter_;
// The |AudioFile| is responsible for ADTS packetization.
AudioFileID file_;
// An |AudioBufferList| passed to the converter to store encoded samples.
AudioBufferList converter_abl_;
// The number of access units emitted so far by the encoder.
uint64_t num_access_units_;
DISALLOW_COPY_AND_ASSIGN(AppleAacImpl);
};
#endif // defined(OS_MAC)
class AudioEncoder::Pcm16Impl final : public AudioEncoder::ImplBase {
public:
Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment,
int num_channels,
int sampling_rate,
FrameEncodedCallback callback)
: ImplBase(cast_environment,
CODEC_AUDIO_PCM16,
num_channels,
sampling_rate,
sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */
std::move(callback)),
buffer_(new int16_t[num_channels * samples_per_frame_]) {
if (ImplBase::operational_status_ != STATUS_UNINITIALIZED)
return;
operational_status_ = STATUS_INITIALIZED;
}
private:
~Pcm16Impl() final = default;
void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
int source_offset,
int buffer_fill_offset,
int num_samples) final {
audio_bus->ToInterleavedPartial<SignedInt16SampleTypeTraits>(
source_offset, num_samples,
buffer_.get() + buffer_fill_offset * num_channels_);
}
bool EncodeFromFilledBuffer(std::string* out) final {
// Output 16-bit PCM integers in big-endian byte order.
out->resize(num_channels_ * samples_per_frame_ * sizeof(int16_t));
const int16_t* src = buffer_.get();
const int16_t* const src_end = src + num_channels_ * samples_per_frame_;
uint16_t* dest = reinterpret_cast<uint16_t*>(&out->at(0));
for (; src < src_end; ++src, ++dest)
*dest = base::HostToNet16(*src);
return true;
}
private:
const std::unique_ptr<int16_t[]> buffer_;
DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
};
AudioEncoder::AudioEncoder(
const scoped_refptr<CastEnvironment>& cast_environment,
int num_channels,
int sampling_rate,
int bitrate,
Codec codec,
FrameEncodedCallback frame_encoded_callback)
: cast_environment_(cast_environment) {
// Note: It doesn't matter which thread constructs AudioEncoder, just so long
// as all calls to InsertAudio() are by the same thread.
insert_thread_checker_.DetachFromThread();
switch (codec) {
#if !defined(OS_IOS)
case CODEC_AUDIO_OPUS:
impl_ = new OpusImpl(cast_environment, num_channels, sampling_rate,
bitrate, std::move(frame_encoded_callback));
break;
#endif
#if defined(OS_MAC)
case CODEC_AUDIO_AAC:
impl_ = new AppleAacImpl(cast_environment, num_channels, sampling_rate,
bitrate, std::move(frame_encoded_callback));
break;
#endif // defined(OS_MAC)
case CODEC_AUDIO_PCM16:
impl_ = new Pcm16Impl(cast_environment, num_channels, sampling_rate,
std::move(frame_encoded_callback));
break;
default:
NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
break;
}
}
AudioEncoder::~AudioEncoder() = default;
OperationalStatus AudioEncoder::InitializationResult() const {
DCHECK(insert_thread_checker_.CalledOnValidThread());
if (impl_.get()) {
return impl_->InitializationResult();
}
return STATUS_UNSUPPORTED_CODEC;
}
int AudioEncoder::GetSamplesPerFrame() const {
DCHECK(insert_thread_checker_.CalledOnValidThread());
if (InitializationResult() != STATUS_INITIALIZED) {
NOTREACHED();
return std::numeric_limits<int>::max();
}
return impl_->samples_per_frame();
}
base::TimeDelta AudioEncoder::GetFrameDuration() const {
DCHECK(insert_thread_checker_.CalledOnValidThread());
if (InitializationResult() != STATUS_INITIALIZED) {
NOTREACHED();
return base::TimeDelta();
}
return impl_->frame_duration();
}
void AudioEncoder::InsertAudio(std::unique_ptr<AudioBus> audio_bus,
const base::TimeTicks& recorded_time) {
DCHECK(insert_thread_checker_.CalledOnValidThread());
DCHECK(audio_bus.get());
if (InitializationResult() != STATUS_INITIALIZED) {
NOTREACHED();
return;
}
cast_environment_->PostTask(
CastEnvironment::AUDIO, FROM_HERE,
base::BindOnce(&AudioEncoder::ImplBase::EncodeAudio, impl_,
std::move(audio_bus), recorded_time));
}
} // namespace cast
} // namespace media