blob: 51206e0662cb0cc5299ae855b8d5f26e6d00e245 [file] [log] [blame]
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "starboard/shared/starboard/microphone/microphone_internal.h"
// Windows headers.
#include <collection.h>
#include <MemoryBuffer.h>
#include <ppltasks.h>
// C++ headers.
#include <algorithm>
#include <deque>
#include <memory>
#include <sstream>
#include <vector>
#include "starboard/atomic.h"
#include "starboard/common/semaphore.h"
#include "starboard/log.h"
#include "starboard/mutex.h"
#include "starboard/shared/uwp/app_accessors.h"
#include "starboard/shared/uwp/application_uwp.h"
#include "starboard/shared/uwp/async_utils.h"
#include "starboard/shared/win32/error_utils.h"
#include "starboard/shared/win32/simple_thread.h"
#include "starboard/shared/win32/wchar_utils.h"
#include "starboard/string.h"
#include "starboard/time.h"
#include "starboard/user.h"
#if !SB_HAS(MICROPHONE)
#error Microphone expected to be enabled when compiling a microphone impl.
#endif
using concurrency::task_continuation_context;
using Microsoft::WRL::ComPtr;
using starboard::Mutex;
using starboard::scoped_ptr;
using starboard::ScopedLock;
using starboard::Semaphore;
using starboard::shared::uwp::ApplicationUwp;
using starboard::shared::win32::CheckResult;
using starboard::shared::win32::platformStringToString;
using Windows::Devices::Enumeration::DeviceInformation;
using Windows::Devices::Enumeration::DeviceInformationCollection;
using Windows::Foundation::EventRegistrationToken;
using Windows::Foundation::IMemoryBufferByteAccess;
using Windows::Foundation::IMemoryBufferReference;
using Windows::Foundation::TypedEventHandler;
using Windows::Media::Audio::AudioDeviceInputNode;
using Windows::Media::Audio::AudioDeviceNodeCreationStatus;
using Windows::Media::Audio::AudioFrameOutputNode;
using Windows::Media::Audio::AudioGraph;
using Windows::Media::Audio::AudioGraphCreationStatus;
using Windows::Media::Audio::AudioGraphSettings;
using Windows::Media::Audio::CreateAudioDeviceInputNodeResult;
using Windows::Media::Audio::CreateAudioGraphResult;
using Windows::Media::Audio::QuantumSizeSelectionMode;
using Windows::Media::AudioBuffer;
using Windows::Media::AudioBufferAccessMode;
using Windows::Media::AudioFrame;
using Windows::Media::Capture::MediaCategory;
using Windows::Media::Devices::MediaDevice;
using Windows::Media::MediaProperties::AudioEncodingProperties;
using Windows::Media::Render::AudioRenderCategory;
namespace {
// It appears that cobalt will only request 16khz.
const int kMinSampleRate = 16000;
const int kMaxSampleRate = 44100;
const int kNumChannels = 1;
const int kOutputBytesPerSample = sizeof(int16_t);
const int kMinReadSizeBytes = 4096;
const int kMicGain = 1;
// Controls the amount of time that a microphone will record muted audio
// before it signals a read error. Without this trigger, the app
// will continuously wait for audio data. This happens with the Kinect
// device, which when disconnected will still record 0-value samples.
const SbTime kTimeMutedThreshold = 3 * kSbTimeSecond;
// Maps [-1.0f, 1.0f] -> [-32768, 32767]
// Values outside of [-1.0f, 1.0] are clamped.
int16_t To16BitPcm(float val) {
static const float kMaxFloatValue = std::numeric_limits<int16_t>::max();
static const float kLowFloatValue = std::numeric_limits<int16_t>::lowest();
if (val == 0.0f) {
return 0;
} else if (val > 0.0f) {
if (val > 1.0f) {
val = 1.0;
}
return static_cast<int16_t>(val * kMaxFloatValue);
} else {
if (val < -1.0f) {
val = -1.0;
}
return static_cast<int16_t>(-1.0f * val * kLowFloatValue);
}
}
const char* ToString(AudioDeviceNodeCreationStatus status) {
switch (status) {
case AudioDeviceNodeCreationStatus::AccessDenied:
return "AccessDenied";
case AudioDeviceNodeCreationStatus::DeviceNotAvailable:
return "DeviceNotAvailable";
case AudioDeviceNodeCreationStatus::FormatNotSupported:
return "FormatNotSupported";
case AudioDeviceNodeCreationStatus::Success:
return "Success";
case AudioDeviceNodeCreationStatus::UnknownFailure:
return "UnknownFailure";
}
return "Unknown";
}
bool IsUiThread() {
auto dispatcher = starboard::shared::uwp::GetDispatcher();
// Is UI thread.
return dispatcher->HasThreadAccess;
}
std::vector<DeviceInformation^> GetAllMicrophoneDevices() {
std::vector<DeviceInformation^> output;
Platform::String^ audio_str = MediaDevice::GetAudioCaptureSelector();
DeviceInformationCollection^ all_devices =
starboard::shared::uwp::WaitForResult(
DeviceInformation::FindAllAsync(audio_str));
for (DeviceInformation^ dev_info : all_devices) {
output.push_back(dev_info);
}
return output;
}
AudioGraph^ CreateAudioGraph(AudioRenderCategory category,
QuantumSizeSelectionMode selection_mode) {
AudioGraphSettings^ settings = ref new AudioGraphSettings(category);
settings->QuantumSizeSelectionMode = selection_mode;
CreateAudioGraphResult^ result =
starboard::shared::uwp::WaitForResult(AudioGraph::CreateAsync(settings));
SB_DCHECK(result->Status == AudioGraphCreationStatus::Success);
AudioGraph^ graph = result->Graph;
return graph;
}
std::vector<AudioDeviceInputNode^> GenerateAudioInputNodes(
const std::vector<DeviceInformation^>& microphone_devices,
AudioEncodingProperties^ encoding_properties,
AudioGraph^ graph) {
std::vector<AudioDeviceInputNode^> output;
for (DeviceInformation^ mic : microphone_devices) {
auto create_microphone_input_task = graph->CreateDeviceInputNodeAsync(
MediaCategory::Speech, encoding_properties, mic);
CreateAudioDeviceInputNodeResult^ deviceInputNodeResult =
starboard::shared::uwp::WaitForResult(create_microphone_input_task);
auto status = deviceInputNodeResult->Status;
AudioDeviceInputNode^ input_node = deviceInputNodeResult->DeviceInputNode;
if (status != AudioDeviceNodeCreationStatus::Success) {
SB_LOG(INFO) << "Failed to create microphone with device name \""
<< platformStringToString(mic->Name) << "\" because "
<< ToString(status);
continue;
}
SB_LOG(INFO) << "Created a microphone with device \""
<< platformStringToString(mic->Name) << "\"";
input_node->ConsumeInput = true;
input_node->OutgoingGain = kMicGain;
output.push_back(input_node);
}
return output;
}
// Reinterprets underlying buffer type to match destination vector.
void ExtractRawAudioData(AudioFrameOutputNode^ node,
std::vector<float>* destination) {
AudioFrame^ audio_frame = node->GetFrame();
AudioBuffer^ audio_buffer =
audio_frame->LockBuffer(AudioBufferAccessMode::Read);
IMemoryBufferReference^ memory_buffer_reference =
audio_buffer->CreateReference();
ComPtr<IMemoryBufferByteAccess> memory_byte_access;
HRESULT hr = reinterpret_cast<IInspectable*>(memory_buffer_reference)
->QueryInterface(IID_PPV_ARGS(&memory_byte_access));
CheckResult(hr);
BYTE* data = nullptr;
UINT32 capacity = 0;
hr = memory_byte_access->GetBuffer(&data, &capacity);
CheckResult(hr);
// Audio data is float data, so the buffer must be a multiple of 4.
SB_DCHECK(capacity % sizeof(float) == 0);
if (capacity > 0) {
float* typed_data = reinterpret_cast<float*>(data);
const size_t typed_data_size = capacity / sizeof(float);
destination->insert(destination->end(), typed_data,
typed_data + typed_data_size);
}
}
// Timer useful for detecting that the microphone has been muted for a certain
// amount of time.
class MutedTrigger {
public:
void SignalMuted() {
if (state_ != kInitialized) {
return;
}
state_ = kIsMuted;
time_start_ = SbTimeGetMonotonicNow();
}
void SignalSound() {
state_ = kFoundSound;
}
bool IsMuted(SbTimeMonotonic duration_theshold) const {
if (state_ != kIsMuted) {
return false;
}
SbTimeMonotonic duration = SbTimeGetMonotonicNow() - time_start_;
return duration > duration_theshold;
}
private:
enum State {
kInitialized,
kIsMuted,
kFoundSound
};
State state_ = kInitialized;
SbTimeMonotonic time_start_ = 0;
};
// MicrophoneProcessor encapsulates Microsoft's audio api. All available
// microphones are queried and instantiated. This class will mix the audio
// together into one signed 16-bit pcm stream.
//
// When the microphone is created it will find all available microphones and
// immediately start recording. A callback will be created which will process
// audio data when new samples are available. The Microphone will stop
// recording when Close() is called.
ref class MicrophoneProcessor sealed {
public:
// This will try and create a microphone. This will fail (return null) if
// there are not available microphones.
static MicrophoneProcessor^ TryCreateAndStartRecording(
size_t max_num_samples,
int sample_rate) {
std::vector<DeviceInformation^> microphone_devices =
GetAllMicrophoneDevices();
if (microphone_devices.empty()) { // Unexpected condition.
return nullptr;
}
MicrophoneProcessor^ output = ref new MicrophoneProcessor(
max_num_samples, sample_rate, microphone_devices);
if (output->input_nodes_.empty()) {
output = nullptr;
}
return output;
}
virtual ~MicrophoneProcessor() {
}
void Close() {
audio_graph_->QuantumStarted -= removal_token_;
ScopedLock lock(mutex_);
audio_graph_->Stop();
}
// Returns the number of elements that have been written, or -1 if there
// was a read error.
int Read(int16_t* out_audio_data, size_t out_audio_count) {
ScopedLock lock(mutex_);
if (muted_timer_.IsMuted(kTimeMutedThreshold)) {
return -1;
}
out_audio_count = std::min(out_audio_count,
pcm_audio_data_.size());
using iter = std::vector<int16_t>::iterator;
iter it_begin = pcm_audio_data_.begin();
iter it_end = pcm_audio_data_.begin() + out_audio_count;
std::copy(it_begin, it_end, out_audio_data);
pcm_audio_data_.erase(it_begin, it_end);
return static_cast<int>(out_audio_count);
}
private:
explicit MicrophoneProcessor(
size_t max_num_samples,
int sample_rate,
const std::vector<DeviceInformation^>& microphone_devices)
: max_num_samples_(max_num_samples) {
audio_graph_ = CreateAudioGraph(AudioRenderCategory::Speech,
QuantumSizeSelectionMode::SystemDefault);
wave_encoder_ =
AudioEncodingProperties::CreatePcm(sample_rate, kNumChannels,
16); // 4-byte float.
SB_DCHECK(audio_graph_);
input_nodes_ = GenerateAudioInputNodes(microphone_devices, wave_encoder_,
audio_graph_);
for (AudioDeviceInputNode^ input_node : input_nodes_) {
AudioFrameOutputNode^ audio_frame_node =
audio_graph_->CreateFrameOutputNode(wave_encoder_);
audio_frame_node->ConsumeInput = true;
input_node->AddOutgoingConnection(audio_frame_node);
audio_channel_.emplace_back(new std::vector<float>());
audio_frame_nodes_.push_back(audio_frame_node);
}
// Update the audio data whenever a new audio sample has been finished.
removal_token_ =
audio_graph_->QuantumStarted +=
ref new TypedEventHandler<AudioGraph^, Object^>(
this, &MicrophoneProcessor::OnQuantumStarted);
audio_graph_->Start();
}
void OnQuantumStarted(AudioGraph^, Object^) {
Process();
}
void Process() {
ScopedLock lock(mutex_);
if (audio_frame_nodes_.empty()) {
return;
}
for (size_t i = 0; i < audio_frame_nodes_.size(); ++i) {
ExtractRawAudioData(audio_frame_nodes_[i], audio_channel_[i].get());
}
size_t num_elements = max_num_samples_;
for (const auto& audio_datum : audio_channel_) {
num_elements = std::min(audio_datum->size(), num_elements);
}
if (num_elements == 0) {
return;
}
bool is_muted = true;
// Mix all available audio channels together and convert to output buffer
// format. Detect if audio is muted.
for (int i = 0; i < num_elements; ++i) {
float mixed_sample = 0.0f;
for (const auto& audio_datum : audio_channel_) {
float sample = (*audio_datum)[i];
if (sample != 0.0) {
is_muted = false;
}
mixed_sample += sample;
}
pcm_audio_data_.push_back(To16BitPcm(mixed_sample));
}
// Trim values from finished pcm_data if the buffer has exceeded it's
// allowed size.
if (pcm_audio_data_.size() > max_num_samples_) {
size_t num_delete = pcm_audio_data_.size() - max_num_samples_;
pcm_audio_data_.erase(pcm_audio_data_.begin(),
pcm_audio_data_.begin() + num_delete);
}
if (is_muted) {
muted_timer_.SignalMuted();
} else {
muted_timer_.SignalSound();
}
// Trim values from source channels that were just transfered to
// pcm_audio_data.
for (const auto& audio_datum : audio_channel_) {
audio_datum->erase(audio_datum->begin(),
audio_datum->begin() + num_elements);
}
}
AudioGraph^ audio_graph_ = nullptr;
AudioEncodingProperties^ wave_encoder_;
std::vector<AudioDeviceInputNode^> input_nodes_;
std::vector<AudioFrameOutputNode^> audio_frame_nodes_;
std::vector<std::unique_ptr<std::vector<float>>> audio_channel_;
std::vector<int16_t> pcm_audio_data_;
EventRegistrationToken removal_token_;
size_t max_num_samples_ = 0;
MutedTrigger muted_timer_;
Mutex mutex_;
};
// Implements the SbMicrophonePrivate interface.
class MicrophoneImpl : public SbMicrophonePrivate {
public:
MicrophoneImpl(int sample_rate, int buffer_size_bytes)
: buffer_size_bytes_(buffer_size_bytes),
sample_rate_(sample_rate) {
}
~MicrophoneImpl() { Close(); }
bool Open() override {
if (!microphone_.Get()) {
if (IsUiThread()) {
SB_LOG(INFO) << "Could not open microphone from UI thread.";
return false;
}
microphone_ = MicrophoneProcessor::TryCreateAndStartRecording(
buffer_size_bytes_ / kOutputBytesPerSample,
sample_rate_);
}
return microphone_ != nullptr;
}
bool Close() override {
microphone_->Close();
microphone_ = nullptr;
return true;
}
int Read(void* out_audio_data, int audio_data_size) override {
if (!microphone_.Get()) {
return -1;
}
int16_t* pcm_buffer = reinterpret_cast<int16*>(out_audio_data);
size_t pcm_buffer_count = audio_data_size / kOutputBytesPerSample;
int n_samples = microphone_->Read(pcm_buffer, pcm_buffer_count);
if (n_samples < 0) {
return -1; // Is error.
} else {
return n_samples * kOutputBytesPerSample;
}
}
private:
const int buffer_size_bytes_;
const int sample_rate_;
Platform::Agile<MicrophoneProcessor> microphone_;
};
// Singleton access is required by the microphone interface as specified by
// nplb.
const SbMicrophoneId kSingletonId =
reinterpret_cast<SbMicrophoneId>(0x1);
starboard::atomic_pointer<MicrophoneImpl*> s_singleton_pointer;
} // namespace.
int SbMicrophonePrivate::GetAvailableMicrophones(
SbMicrophoneInfo* out_info_array,
int info_array_size) {
if (GetAllMicrophoneDevices().empty()) {
return 0;
}
if (out_info_array && (info_array_size >= 1)) {
SbMicrophoneInfo info = {kSingletonId, kSBMicrophoneAnalogHeadset,
kMaxSampleRate, kMinReadSizeBytes};
out_info_array[0] = info;
}
return 1;
}
bool SbMicrophonePrivate::IsMicrophoneSampleRateSupported(
SbMicrophoneId id,
int sample_rate_in_hz) {
if (!SbMicrophoneIdIsValid(id)) {
return false;
}
return (kMinSampleRate <= sample_rate_in_hz) &&
(sample_rate_in_hz <= kMaxSampleRate);
}
SbMicrophone SbMicrophonePrivate::CreateMicrophone(SbMicrophoneId id,
int sample_rate_in_hz,
int buffer_size_bytes) {
if (!SbMicrophoneIdIsValid(id)) {
return kSbMicrophoneInvalid;
}
if (sample_rate_in_hz < kMinSampleRate) {
return kSbMicrophoneInvalid;
}
if (sample_rate_in_hz > kMaxSampleRate) {
return kSbMicrophoneInvalid;
}
if (buffer_size_bytes <= 0) {
return kSbMicrophoneInvalid;
}
// Required to conform to nplb test.
if (buffer_size_bytes >= (std::numeric_limits<int>::max() - 1)) {
return kSbMicrophoneInvalid;
}
// Id will either by 1 or 0. At this time there is only one microphone.
SB_DCHECK(id == kSingletonId);
if (s_singleton_pointer.load()) {
return kSbMicrophoneInvalid;
}
MicrophoneImpl* new_microphone =
new MicrophoneImpl(sample_rate_in_hz, buffer_size_bytes);
s_singleton_pointer.store(new_microphone);
return new_microphone;
}
void SbMicrophonePrivate::DestroyMicrophone(SbMicrophone microphone) {
SB_DCHECK(microphone == s_singleton_pointer.load());
s_singleton_pointer.store(nullptr);
delete microphone;
}