blob: 043c9095c1fe1ee93201949d99c95649d788f668 [file] [log] [blame]
// Copyright 2017 The Cobalt Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
#include "starboard/shared/starboard/microphone/microphone_internal.h"
// Windows headers.
#include <MemoryBuffer.h>
#include <collection.h>
#include <ppltasks.h>
// C++ headers.
#include <algorithm>
#include <deque>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include "starboard/common/atomic.h"
#include "starboard/common/log.h"
#include "starboard/common/mutex.h"
#include "starboard/common/semaphore.h"
#include "starboard/common/string.h"
#include "starboard/common/thread.h"
#include "starboard/shared/uwp/app_accessors.h"
#include "starboard/shared/uwp/application_uwp.h"
#include "starboard/shared/uwp/async_utils.h"
#include "starboard/shared/win32/error_utils.h"
#include "starboard/shared/win32/wchar_utils.h"
#include "starboard/time.h"
#include "starboard/user.h"
using concurrency::task_continuation_context;
using Microsoft::WRL::ComPtr;
using starboard::Mutex;
using starboard::scoped_ptr;
using starboard::ScopedLock;
using starboard::Semaphore;
using starboard::shared::uwp::ApplicationUwp;
using starboard::shared::win32::platformStringToString;
using Windows::Devices::Enumeration::DeviceInformation;
using Windows::Devices::Enumeration::DeviceInformationCollection;
using Windows::Foundation::EventRegistrationToken;
using Windows::Foundation::IMemoryBufferByteAccess;
using Windows::Foundation::IMemoryBufferReference;
using Windows::Foundation::TypedEventHandler;
using Windows::Foundation::Uri;
using Windows::Media::AudioBuffer;
using Windows::Media::AudioBufferAccessMode;
using Windows::Media::AudioFrame;
using Windows::Media::Audio::AudioDeviceInputNode;
using Windows::Media::Audio::AudioDeviceNodeCreationStatus;
using Windows::Media::Audio::AudioFrameOutputNode;
using Windows::Media::Audio::AudioGraph;
using Windows::Media::Audio::AudioGraphCreationStatus;
using Windows::Media::Audio::AudioGraphSettings;
using Windows::Media::Audio::CreateAudioDeviceInputNodeResult;
using Windows::Media::Audio::CreateAudioGraphResult;
using Windows::Media::Audio::QuantumSizeSelectionMode;
using Windows::Media::Capture::MediaCategory;
using Windows::Media::Devices::MediaDevice;
using Windows::Media::MediaProperties::AudioEncodingProperties;
using Windows::Media::Render::AudioRenderCategory;
using Windows::System::Launcher;
namespace {
// It appears that cobalt will only request 16khz.
const int kMinSampleRate = 16000;
const int kMaxSampleRate = 44100;
const int kNumChannels = 1;
const int kOutputBytesPerSample = sizeof(int16_t);
const int kMinReadSizeBytes = 4096;
const int kMicGain = 1;
// Controls the amount of time that a microphone will record muted audio
// before it signals a read error. Without this trigger, the app
// will continuously wait for audio data. This happens with the Kinect
// device, which when disconnected will still record 0-value samples.
const SbTime kTimeMutedThreshold = 3 * kSbTimeSecond;
// Maps [-1.0f, 1.0f] -> [-32768, 32767]
// Values outside of [-1.0f, 1.0] are clamped.
int16_t To16BitPcm(float val) {
static const float kMaxFloatValue = std::numeric_limits<int16_t>::max();
static const float kLowFloatValue = std::numeric_limits<int16_t>::lowest();
if (val == 0.0f) {
return 0;
} else if (val > 0.0f) {
if (val > 1.0f) {
val = 1.0;
return static_cast<int16_t>(val * kMaxFloatValue);
} else {
if (val < -1.0f) {
val = -1.0;
return static_cast<int16_t>(-1.0f * val * kLowFloatValue);
const char* ToString(AudioDeviceNodeCreationStatus status) {
switch (status) {
case AudioDeviceNodeCreationStatus::AccessDenied:
return "AccessDenied";
case AudioDeviceNodeCreationStatus::DeviceNotAvailable:
return "DeviceNotAvailable";
case AudioDeviceNodeCreationStatus::FormatNotSupported:
return "FormatNotSupported";
case AudioDeviceNodeCreationStatus::Success:
return "Success";
case AudioDeviceNodeCreationStatus::UnknownFailure:
return "UnknownFailure";
return "Unknown";
bool IsUiThread() {
auto dispatcher = starboard::shared::uwp::GetDispatcher();
// Is UI thread.
return dispatcher->HasThreadAccess;
void LaunchMicrophonePermissionsAppAsync() {
// Schedule a task to run on the main thread which will launch a URI to
// request microphone permissions.
auto main_thread_task =
[]() {
auto uri = ref new Uri("ms-settings:privacy-microphone");
.then([](concurrency::task<bool> previous_task) {
try {
bool launched_ok = !!previous_task.get();
SB_LOG_IF(ERROR, !launched_ok);
} catch (Platform::Exception ^ e) {
HRESULT hr = e->HResult;
std::string msg = platformStringToString(e->Message);
<< "Exception while launching permissions app, HRESULT: "
<< hr << ", msg: " << msg;
std::vector<DeviceInformation ^> GetAllMicrophoneDevices() {
std::vector<DeviceInformation ^> output;
Platform::String ^ audio_str = MediaDevice::GetAudioCaptureSelector();
DeviceInformationCollection ^ all_devices =
for (DeviceInformation ^ dev_info : all_devices) {
return output;
AudioGraph ^
CreateAudioGraph(AudioRenderCategory category,
QuantumSizeSelectionMode selection_mode) {
AudioGraphSettings ^ settings = ref new AudioGraphSettings(category);
settings->QuantumSizeSelectionMode = selection_mode;
CreateAudioGraphResult ^ result = starboard::shared::uwp::WaitForResult(
SB_DCHECK(result->Status == AudioGraphCreationStatus::Success);
AudioGraph ^ graph = result->Graph;
return graph;
} std::
vector<AudioDeviceInputNode ^> GenerateAudioInputNodes(
const std::vector<DeviceInformation ^>& microphone_devices,
AudioEncodingProperties ^ encoding_properties,
AudioGraph ^ graph) {
std::vector<AudioDeviceInputNode ^> output;
SbTime start_time = SbTimeGetMonotonicNow();
bool had_permissions_error = false;
for (DeviceInformation ^ mic : microphone_devices) {
auto create_microphone_input_task = graph->CreateDeviceInputNodeAsync(
MediaCategory::Speech, encoding_properties, mic);
CreateAudioDeviceInputNodeResult ^ deviceInputNodeResult =
auto status = deviceInputNodeResult->Status;
AudioDeviceInputNode ^ input_node = deviceInputNodeResult->DeviceInputNode;
if (status != AudioDeviceNodeCreationStatus::Success) {
SB_LOG(INFO) << "Failed to create microphone with device name \""
<< platformStringToString(mic->Name) << "\" because "
<< ToString(status);
if (status == AudioDeviceNodeCreationStatus::AccessDenied) {
// The user hasn't given cobalt access to the microphone because they
// declined access to the microphone now or previously.
had_permissions_error = true;
SB_LOG(INFO) << "Created a microphone with device \""
<< platformStringToString(mic->Name) << "\"";
input_node->ConsumeInput = true;
input_node->OutgoingGain = kMicGain;
SbTime delta_time = SbTimeGetMonotonicNow() - start_time;
const bool had_ui_interaction = delta_time > (kSbTimeMillisecond * 250);
// We only care to retry permissions if there were
// 1. No microphones that could be opened.
// 2. There are 1 or more microphones that had errors.
// 3. There was no UI interaction, which is detected if the audio
// node creation completed really quickly. A quick action suggests
// that there was no user interaction and therefore we are in a
// permissions "cooldown" period. These typically last for 30 minutes
// and the work around requires an explicit permissions request.
const bool do_launch_microphone_permissions_app =
output.empty() && had_permissions_error && !had_ui_interaction;
if (do_launch_microphone_permissions_app) {
return output;
// Reinterprets underlying buffer type to match destination vector.
void ExtractRawAudioData(AudioFrameOutputNode ^ node,
std::vector<float>* destination) {
AudioFrame ^ audio_frame = node->GetFrame();
AudioBuffer ^ audio_buffer =
IMemoryBufferReference ^ memory_buffer_reference =
ComPtr<IMemoryBufferByteAccess> memory_byte_access;
HRESULT hr = reinterpret_cast<IInspectable*>(memory_buffer_reference)
BYTE* data = nullptr;
UINT32 capacity = 0;
hr = memory_byte_access->GetBuffer(&data, &capacity);
// Audio data is float data, so the buffer must be a multiple of 4.
SB_DCHECK(capacity % sizeof(float) == 0);
if (capacity > 0) {
float* typed_data = reinterpret_cast<float*>(data);
const size_t typed_data_size = capacity / sizeof(float);
destination->insert(destination->end(), typed_data,
typed_data + typed_data_size);
// Timer useful for detecting that the microphone has been muted for a certain
// amount of time.
class MutedTrigger {
void SignalMuted() {
if (state_ == kIsMuted) {
state_ = kIsMuted;
time_start_ = SbTimeGetMonotonicNow();
void SignalSound() { state_ = kFoundSound; }
bool IsMuted(SbTimeMonotonic duration_threshold) const {
if (state_ != kIsMuted) {
return false;
SbTimeMonotonic duration = SbTimeGetMonotonicNow() - time_start_;
return duration > duration_threshold;
enum State { kInitialized, kIsMuted, kFoundSound };
State state_ = kInitialized;
SbTimeMonotonic time_start_ = 0;
// MicrophoneProcessor encapsulates Microsoft's audio api. All available
// microphones are queried and instantiated. This class will mix the audio
// together into one signed 16-bit pcm stream.
// When the microphone is created it will find all available microphones and
// immediately start recording. A callback will be created which will process
// audio data when new samples are available. The Microphone will stop
// recording when Close() is called.
class MicrophoneProcessor : public starboard::Thread {
// This will try and create a microphone. This will fail (return null) if
// there are not available microphones.
static scoped_ptr<MicrophoneProcessor> TryCreateAndStartRecording(
size_t max_num_samples,
int sample_rate) {
scoped_ptr<MicrophoneProcessor> output;
std::vector<DeviceInformation ^> microphone_devices =
if (microphone_devices.empty()) { // Unexpected condition.
return output.Pass();
output.reset(new MicrophoneProcessor(max_num_samples, sample_rate,
if (output->input_nodes_.empty()) {
return output.Pass();
virtual ~MicrophoneProcessor() {
// Returns the number of elements that have been written, or -1 if there
// was a read error.
int Read(int16_t* out_audio_data, size_t out_audio_count) {
ScopedLock lock(mutex_);
if (muted_timer_.IsMuted(kTimeMutedThreshold)) {
return -1;
out_audio_count = std::min(out_audio_count, pcm_audio_data_.size());
using iter = std::vector<int16_t>::iterator;
iter it_begin = pcm_audio_data_.begin();
iter it_end = pcm_audio_data_.begin() + out_audio_count;
std::copy(it_begin, it_end, out_audio_data);
pcm_audio_data_.erase(it_begin, it_end);
return static_cast<int>(out_audio_count);
explicit MicrophoneProcessor(
size_t max_num_samples,
int sample_rate,
const std::vector<DeviceInformation ^>& microphone_devices)
: Thread("MicrophoneProc"), max_num_samples_(max_num_samples) {
audio_graph_ = CreateAudioGraph(AudioRenderCategory::Speech,
wave_encoder_ =
AudioEncodingProperties::CreatePcm(sample_rate, kNumChannels,
16); // 4-byte float.
input_nodes_ = GenerateAudioInputNodes(microphone_devices, wave_encoder_,
for (AudioDeviceInputNode ^ input_node : input_nodes_) {
AudioFrameOutputNode ^ audio_frame_node =
audio_frame_node->ConsumeInput = true;
audio_channel_.emplace_back(new std::vector<float>());
// Update the audio data whenever a new audio sample has been finished.
void Run() override {
while (!join_called()) {
void Process() {
ScopedLock lock(mutex_);
if (audio_frame_nodes_.empty()) {
for (size_t i = 0; i < audio_frame_nodes_.size(); ++i) {
ExtractRawAudioData(audio_frame_nodes_[i], audio_channel_[i].get());
size_t num_elements = max_num_samples_;
for (const auto& audio_datum : audio_channel_) {
num_elements = std::min(audio_datum->size(), num_elements);
if (num_elements == 0) {
bool is_muted = true;
// Mix all available audio channels together and convert to output buffer
// format. Detect if audio is muted.
for (int i = 0; i < num_elements; ++i) {
float mixed_sample = 0.0f;
for (const auto& audio_datum : audio_channel_) {
float sample = (*audio_datum)[i];
if (sample != 0.0) {
is_muted = false;
mixed_sample += sample;
// Trim values from finished pcm_data if the buffer has exceeded it's
// allowed size.
if (pcm_audio_data_.size() > max_num_samples_) {
size_t num_delete = pcm_audio_data_.size() - max_num_samples_;
pcm_audio_data_.begin() + num_delete);
if (is_muted) {
} else {
// Trim values from source channels that were just transferred to
// pcm_audio_data.
for (const auto& audio_datum : audio_channel_) {
audio_datum->begin() + num_elements);
AudioGraph ^ audio_graph_ = nullptr;
AudioEncodingProperties ^ wave_encoder_;
std::vector<AudioDeviceInputNode ^> input_nodes_;
std::vector<AudioFrameOutputNode ^> audio_frame_nodes_;
std::vector<std::unique_ptr<std::vector<float>>> audio_channel_;
std::vector<int16_t> pcm_audio_data_;
size_t max_num_samples_ = 0;
MutedTrigger muted_timer_;
Mutex mutex_;
// Implements the SbMicrophonePrivate interface.
class MicrophoneImpl : public SbMicrophonePrivate {
MicrophoneImpl(int sample_rate, int buffer_size_bytes)
: buffer_size_bytes_(buffer_size_bytes), sample_rate_(sample_rate) {}
~MicrophoneImpl() { Close(); }
bool Open() override {
if (!microphone_) {
if (IsUiThread()) {
SB_LOG(INFO) << "Could not open microphone from UI thread.";
return false;
microphone_ = MicrophoneProcessor::TryCreateAndStartRecording(
buffer_size_bytes_ / kOutputBytesPerSample, sample_rate_);
return microphone_ != nullptr;
bool Close() override {
return true;
int Read(void* out_audio_data, int audio_data_size) override {
if (!microphone_) {
return -1;
int16_t* pcm_buffer = reinterpret_cast<int16*>(out_audio_data);
size_t pcm_buffer_count = audio_data_size / kOutputBytesPerSample;
int n_samples = microphone_->Read(pcm_buffer, pcm_buffer_count);
if (n_samples < 0) {
return -1; // Is error.
} else {
return n_samples * kOutputBytesPerSample;
const int buffer_size_bytes_;
const int sample_rate_;
scoped_ptr<MicrophoneProcessor> microphone_;
// Singleton access is required by the microphone interface as specified by
// nplb.
const SbMicrophoneId kSingletonId = reinterpret_cast<SbMicrophoneId>(0x1);
starboard::atomic_pointer<MicrophoneImpl*> s_singleton_pointer;
} // namespace.
int SbMicrophonePrivate::GetAvailableMicrophones(
SbMicrophoneInfo* out_info_array,
int info_array_size) {
std::vector<DeviceInformation ^> mic_devices = GetAllMicrophoneDevices();
if (mic_devices.empty()) {
return 0;
if (out_info_array && (info_array_size >= 1)) {
SbMicrophoneInfo info;
memset(&info, 0, sizeof(info)); = kSingletonId;
info.type = kSBMicrophoneAnalogHeadset;
info.max_sample_rate_hz = kMaxSampleRate;
info.min_read_size = kMinReadSizeBytes;
std::stringstream all_mic_names;
for (size_t i = 0; i < mic_devices.size(); ++i) {
DeviceInformation ^ mic_dev = mic_devices[i];
if (i > 0) {
all_mic_names << ", ";
all_mic_names << "[" << platformStringToString(mic_dev->Name) << "]";
starboard::strlcpy(info.label, all_mic_names.str().c_str(),
out_info_array[0] = info;
return 1;
bool SbMicrophonePrivate::IsMicrophoneSampleRateSupported(
SbMicrophoneId id,
int sample_rate_in_hz) {
if (!SbMicrophoneIdIsValid(id)) {
return false;
return (kMinSampleRate <= sample_rate_in_hz) &&
(sample_rate_in_hz <= kMaxSampleRate);
SbMicrophone SbMicrophonePrivate::CreateMicrophone(SbMicrophoneId id,
int sample_rate_in_hz,
int buffer_size_bytes) {
if (!SbMicrophoneIdIsValid(id)) {
return kSbMicrophoneInvalid;
if (sample_rate_in_hz < kMinSampleRate) {
return kSbMicrophoneInvalid;
if (sample_rate_in_hz > kMaxSampleRate) {
return kSbMicrophoneInvalid;
if (buffer_size_bytes <= 0) {
return kSbMicrophoneInvalid;
// Required to conform to nplb test.
if (buffer_size_bytes >= (std::numeric_limits<int>::max() - 1)) {
return kSbMicrophoneInvalid;
// Id will either by 1 or 0. At this time there is only one microphone.
SB_DCHECK(id == kSingletonId);
if (s_singleton_pointer.load()) {
return kSbMicrophoneInvalid;
MicrophoneImpl* new_microphone =
new MicrophoneImpl(sample_rate_in_hz, buffer_size_bytes);;
return new_microphone;
void SbMicrophonePrivate::DestroyMicrophone(SbMicrophone microphone) {
SB_DCHECK(microphone == s_singleton_pointer.load());;
delete microphone;