| // Copyright 2017 The Cobalt Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "starboard/shared/starboard/microphone/microphone_internal.h" |
| |
| // Windows headers. |
| #include <MemoryBuffer.h> |
| #include <collection.h> |
| #include <ppltasks.h> |
| |
| // C++ headers. |
| #include <algorithm> |
| #include <deque> |
| #include <memory> |
| #include <sstream> |
| #include <string> |
| #include <vector> |
| |
| #include "starboard/common/atomic.h" |
| #include "starboard/common/log.h" |
| #include "starboard/common/mutex.h" |
| #include "starboard/common/semaphore.h" |
| #include "starboard/common/string.h" |
| #include "starboard/common/thread.h" |
| #include "starboard/common/time.h" |
| #include "starboard/shared/uwp/app_accessors.h" |
| #include "starboard/shared/uwp/application_uwp.h" |
| #include "starboard/shared/uwp/async_utils.h" |
| #include "starboard/shared/win32/error_utils.h" |
| #include "starboard/shared/win32/wchar_utils.h" |
| |
| using concurrency::task_continuation_context; |
| using Microsoft::WRL::ComPtr; |
| using starboard::Mutex; |
| using starboard::scoped_ptr; |
| using starboard::ScopedLock; |
| using starboard::Semaphore; |
| using starboard::shared::uwp::ApplicationUwp; |
| using starboard::shared::win32::platformStringToString; |
| using Windows::Devices::Enumeration::DeviceInformation; |
| using Windows::Devices::Enumeration::DeviceInformationCollection; |
| using Windows::Foundation::EventRegistrationToken; |
| using Windows::Foundation::IMemoryBufferByteAccess; |
| using Windows::Foundation::IMemoryBufferReference; |
| using Windows::Foundation::TypedEventHandler; |
| using Windows::Foundation::Uri; |
| using Windows::Media::AudioBuffer; |
| using Windows::Media::AudioBufferAccessMode; |
| using Windows::Media::AudioFrame; |
| using Windows::Media::Audio::AudioDeviceInputNode; |
| using Windows::Media::Audio::AudioDeviceNodeCreationStatus; |
| using Windows::Media::Audio::AudioFrameOutputNode; |
| using Windows::Media::Audio::AudioGraph; |
| using Windows::Media::Audio::AudioGraphCreationStatus; |
| using Windows::Media::Audio::AudioGraphSettings; |
| using Windows::Media::Audio::CreateAudioDeviceInputNodeResult; |
| using Windows::Media::Audio::CreateAudioGraphResult; |
| using Windows::Media::Audio::QuantumSizeSelectionMode; |
| using Windows::Media::Capture::MediaCategory; |
| using Windows::Media::Devices::MediaDevice; |
| using Windows::Media::MediaProperties::AudioEncodingProperties; |
| using Windows::Media::Render::AudioRenderCategory; |
| using Windows::System::Launcher; |
| |
| namespace { |
| |
| // It appears that cobalt will only request 16khz. |
| const int kMinSampleRate = 16000; |
| const int kMaxSampleRate = 44100; |
| const int kNumChannels = 1; |
| const int kOutputBytesPerSample = sizeof(int16_t); |
| const int kMinReadSizeBytes = 4096; |
| const int kMicGain = 1; |
| |
| // Controls the amount of time that a microphone will record muted audio |
| // before it signals a read error. Without this trigger, the app |
| // will continuously wait for audio data. This happens with the Kinect |
| // device, which when disconnected will still record 0-value samples. |
| const int64_t kTimeMutedThresholdUsec = 3'000'000; // 3 seconds. |
| |
| // Maps [-1.0f, 1.0f] -> [-32768, 32767] |
| // Values outside of [-1.0f, 1.0] are clamped. |
| int16_t To16BitPcm(float val) { |
| static const float kMaxFloatValue = std::numeric_limits<int16_t>::max(); |
| static const float kLowFloatValue = std::numeric_limits<int16_t>::lowest(); |
| if (val == 0.0f) { |
| return 0; |
| } else if (val > 0.0f) { |
| if (val > 1.0f) { |
| val = 1.0; |
| } |
| return static_cast<int16_t>(val * kMaxFloatValue); |
| } else { |
| if (val < -1.0f) { |
| val = -1.0; |
| } |
| return static_cast<int16_t>(-1.0f * val * kLowFloatValue); |
| } |
| } |
| |
| const char* ToString(AudioDeviceNodeCreationStatus status) { |
| switch (status) { |
| case AudioDeviceNodeCreationStatus::AccessDenied: |
| return "AccessDenied"; |
| case AudioDeviceNodeCreationStatus::DeviceNotAvailable: |
| return "DeviceNotAvailable"; |
| case AudioDeviceNodeCreationStatus::FormatNotSupported: |
| return "FormatNotSupported"; |
| case AudioDeviceNodeCreationStatus::Success: |
| return "Success"; |
| case AudioDeviceNodeCreationStatus::UnknownFailure: |
| return "UnknownFailure"; |
| } |
| return "Unknown"; |
| } |
| |
| bool IsUiThread() { |
| auto dispatcher = starboard::shared::uwp::GetDispatcher(); |
| // Is UI thread. |
| return dispatcher->HasThreadAccess; |
| } |
| |
| void LaunchMicrophonePermissionsAppAsync() { |
| // Schedule a task to run on the main thread which will launch a URI to |
| // request microphone permissions. |
| auto main_thread_task = |
| []() { |
| auto uri = ref new Uri("ms-settings:privacy-microphone"); |
| |
| concurrency::create_task(Launcher::LaunchUriAsync(uri)) |
| .then([](concurrency::task<bool> previous_task) { |
| try { |
| bool launched_ok = !!previous_task.get(); |
| SB_LOG_IF(ERROR, !launched_ok); |
| } catch (Platform::Exception ^ e) { |
| HRESULT hr = e->HResult; |
| std::string msg = platformStringToString(e->Message); |
| SB_LOG(ERROR) |
| << "Exception while launching permissions app, HRESULT: " |
| << hr << ", msg: " << msg; |
| } |
| }); |
| }; |
| starboard::shared::uwp::RunInMainThreadAsync(main_thread_task); |
| } |
| |
| std::vector<DeviceInformation ^> GetAllMicrophoneDevices() { |
| std::vector<DeviceInformation ^> output; |
| Platform::String ^ audio_str = MediaDevice::GetAudioCaptureSelector(); |
| DeviceInformationCollection ^ all_devices = |
| starboard::shared::uwp::WaitForResult( |
| DeviceInformation::FindAllAsync(audio_str)); |
| for (DeviceInformation ^ dev_info : all_devices) { |
| output.push_back(dev_info); |
| } |
| |
| return output; |
| } |
| |
| AudioGraph ^ |
| CreateAudioGraph(AudioRenderCategory category, |
| QuantumSizeSelectionMode selection_mode) { |
| AudioGraphSettings ^ settings = ref new AudioGraphSettings(category); |
| settings->QuantumSizeSelectionMode = selection_mode; |
| CreateAudioGraphResult ^ result = starboard::shared::uwp::WaitForResult( |
| AudioGraph::CreateAsync(settings)); |
| SB_DCHECK(result->Status == AudioGraphCreationStatus::Success); |
| AudioGraph ^ graph = result->Graph; |
| return graph; |
| } std:: |
| vector<AudioDeviceInputNode ^> GenerateAudioInputNodes( |
| const std::vector<DeviceInformation ^>& microphone_devices, |
| AudioEncodingProperties ^ encoding_properties, |
| AudioGraph ^ graph) { |
| std::vector<AudioDeviceInputNode ^> output; |
| |
| int64_t start_time = starboard::CurrentMonotonicTime(); |
| |
| bool had_permissions_error = false; |
| for (DeviceInformation ^ mic : microphone_devices) { |
| auto create_microphone_input_task = graph->CreateDeviceInputNodeAsync( |
| MediaCategory::Speech, encoding_properties, mic); |
| CreateAudioDeviceInputNodeResult ^ deviceInputNodeResult = |
| starboard::shared::uwp::WaitForResult(create_microphone_input_task); |
| |
| auto status = deviceInputNodeResult->Status; |
| AudioDeviceInputNode ^ input_node = deviceInputNodeResult->DeviceInputNode; |
| |
| if (status != AudioDeviceNodeCreationStatus::Success) { |
| SB_LOG(INFO) << "Failed to create microphone with device name \"" |
| << platformStringToString(mic->Name) << "\" because " |
| << ToString(status); |
| if (status == AudioDeviceNodeCreationStatus::AccessDenied) { |
| // The user hasn't given cobalt access to the microphone because they |
| // declined access to the microphone now or previously. |
| had_permissions_error = true; |
| } |
| continue; |
| } |
| SB_LOG(INFO) << "Created a microphone with device \"" |
| << platformStringToString(mic->Name) << "\""; |
| input_node->ConsumeInput = true; |
| input_node->OutgoingGain = kMicGain; |
| output.push_back(input_node); |
| } |
| |
| int64_t delta_time = starboard::CurrentMonotonicTime() - start_time; |
| const bool had_ui_interaction = delta_time > 250'000; |
| |
| // We only care to retry permissions if there were |
| // 1. No microphones that could be opened. |
| // 2. There are 1 or more microphones that had errors. |
| // 3. There was no UI interaction, which is detected if the audio |
| // node creation completed really quickly. A quick action suggests |
| // that there was no user interaction and therefore we are in a |
| // permissions "cooldown" period. These typically last for 30 minutes |
| // and the work around requires an explicit permissions request. |
| const bool do_launch_microphone_permissions_app = |
| output.empty() && had_permissions_error && !had_ui_interaction; |
| |
| if (do_launch_microphone_permissions_app) { |
| LaunchMicrophonePermissionsAppAsync(); |
| } |
| return output; |
| } |
| |
| // Reinterprets underlying buffer type to match destination vector. |
| void ExtractRawAudioData(AudioFrameOutputNode ^ node, |
| std::vector<float>* destination) { |
| AudioFrame ^ audio_frame = node->GetFrame(); |
| AudioBuffer ^ audio_buffer = |
| audio_frame->LockBuffer(AudioBufferAccessMode::Read); |
| IMemoryBufferReference ^ memory_buffer_reference = |
| audio_buffer->CreateReference(); |
| |
| ComPtr<IMemoryBufferByteAccess> memory_byte_access; |
| HRESULT hr = reinterpret_cast<IInspectable*>(memory_buffer_reference) |
| ->QueryInterface(IID_PPV_ARGS(&memory_byte_access)); |
| CheckResult(hr); |
| |
| BYTE* data = nullptr; |
| UINT32 capacity = 0; |
| hr = memory_byte_access->GetBuffer(&data, &capacity); |
| CheckResult(hr); |
| |
| // Audio data is float data, so the buffer must be a multiple of 4. |
| SB_DCHECK(capacity % sizeof(float) == 0); |
| |
| if (capacity > 0) { |
| float* typed_data = reinterpret_cast<float*>(data); |
| const size_t typed_data_size = capacity / sizeof(float); |
| destination->insert(destination->end(), typed_data, |
| typed_data + typed_data_size); |
| } |
| } |
| |
| // Timer useful for detecting that the microphone has been muted for a certain |
| // amount of time. |
| class MutedTrigger { |
| public: |
| void SignalMuted() { |
| if (state_ == kIsMuted) { |
| return; |
| } |
| state_ = kIsMuted; |
| time_start_ = starboard::CurrentMonotonicTime(); |
| } |
| |
| void SignalSound() { state_ = kFoundSound; } |
| |
| bool IsMuted(int64_t duration_threshold) const { |
| if (state_ != kIsMuted) { |
| return false; |
| } |
| int64_t duration = starboard::CurrentMonotonicTime() - time_start_; |
| return duration > duration_threshold; |
| } |
| |
| private: |
| enum State { kInitialized, kIsMuted, kFoundSound }; |
| State state_ = kInitialized; |
| int64_t time_start_ = 0; |
| }; |
| |
| // MicrophoneProcessor encapsulates Microsoft's audio api. All available |
| // microphones are queried and instantiated. This class will mix the audio |
| // together into one signed 16-bit pcm stream. |
| // |
| // When the microphone is created it will find all available microphones and |
| // immediately start recording. A callback will be created which will process |
| // audio data when new samples are available. The Microphone will stop |
| // recording when Close() is called. |
| class MicrophoneProcessor : public starboard::Thread { |
| public: |
| // This will try and create a microphone. This will fail (return null) if |
| // there are not available microphones. |
| static scoped_ptr<MicrophoneProcessor> TryCreateAndStartRecording( |
| size_t max_num_samples, |
| int sample_rate) { |
| scoped_ptr<MicrophoneProcessor> output; |
| |
| std::vector<DeviceInformation ^> microphone_devices = |
| GetAllMicrophoneDevices(); |
| if (microphone_devices.empty()) { // Unexpected condition. |
| return output.Pass(); |
| } |
| |
| output.reset(new MicrophoneProcessor(max_num_samples, sample_rate, |
| microphone_devices)); |
| |
| if (output->input_nodes_.empty()) { |
| output.reset(nullptr); |
| } |
| return output.Pass(); |
| } |
| |
| virtual ~MicrophoneProcessor() { |
| Thread::Join(); |
| audio_graph_->Stop(); |
| } |
| |
| // Returns the number of elements that have been written, or -1 if there |
| // was a read error. |
| int Read(int16_t* out_audio_data, size_t out_audio_count) { |
| ScopedLock lock(mutex_); |
| if (muted_timer_.IsMuted(kTimeMutedThresholdUsec)) { |
| return -1; |
| } |
| |
| out_audio_count = std::min(out_audio_count, pcm_audio_data_.size()); |
| using iter = std::vector<int16_t>::iterator; |
| iter it_begin = pcm_audio_data_.begin(); |
| iter it_end = pcm_audio_data_.begin() + out_audio_count; |
| std::copy(it_begin, it_end, out_audio_data); |
| pcm_audio_data_.erase(it_begin, it_end); |
| return static_cast<int>(out_audio_count); |
| } |
| |
| private: |
| explicit MicrophoneProcessor( |
| size_t max_num_samples, |
| int sample_rate, |
| const std::vector<DeviceInformation ^>& microphone_devices) |
| : Thread("MicrophoneProc"), max_num_samples_(max_num_samples) { |
| audio_graph_ = CreateAudioGraph(AudioRenderCategory::Speech, |
| QuantumSizeSelectionMode::SystemDefault); |
| wave_encoder_ = |
| AudioEncodingProperties::CreatePcm(sample_rate, kNumChannels, |
| 16); // 4-byte float. |
| SB_DCHECK(audio_graph_); |
| input_nodes_ = GenerateAudioInputNodes(microphone_devices, wave_encoder_, |
| audio_graph_); |
| for (AudioDeviceInputNode ^ input_node : input_nodes_) { |
| AudioFrameOutputNode ^ audio_frame_node = |
| audio_graph_->CreateFrameOutputNode(wave_encoder_); |
| audio_frame_node->ConsumeInput = true; |
| input_node->AddOutgoingConnection(audio_frame_node); |
| audio_channel_.emplace_back(new std::vector<float>()); |
| audio_frame_nodes_.push_back(audio_frame_node); |
| } |
| // Update the audio data whenever a new audio sample has been finished. |
| audio_graph_->Start(); |
| Thread::Start(); |
| } |
| |
| void Run() override { |
| while (!join_called()) { |
| SleepMilliseconds(1); |
| Process(); |
| } |
| } |
| |
| void Process() { |
| ScopedLock lock(mutex_); |
| if (audio_frame_nodes_.empty()) { |
| return; |
| } |
| for (size_t i = 0; i < audio_frame_nodes_.size(); ++i) { |
| ExtractRawAudioData(audio_frame_nodes_[i], audio_channel_[i].get()); |
| } |
| |
| size_t num_elements = max_num_samples_; |
| for (const auto& audio_datum : audio_channel_) { |
| num_elements = std::min(audio_datum->size(), num_elements); |
| } |
| if (num_elements == 0) { |
| return; |
| } |
| |
| bool is_muted = true; |
| // Mix all available audio channels together and convert to output buffer |
| // format. Detect if audio is muted. |
| for (int i = 0; i < num_elements; ++i) { |
| float mixed_sample = 0.0f; |
| for (const auto& audio_datum : audio_channel_) { |
| float sample = (*audio_datum)[i]; |
| if (sample != 0.0) { |
| is_muted = false; |
| } |
| mixed_sample += sample; |
| } |
| pcm_audio_data_.push_back(To16BitPcm(mixed_sample)); |
| } |
| |
| // Trim values from finished pcm_data if the buffer has exceeded it's |
| // allowed size. |
| if (pcm_audio_data_.size() > max_num_samples_) { |
| size_t num_delete = pcm_audio_data_.size() - max_num_samples_; |
| pcm_audio_data_.erase(pcm_audio_data_.begin(), |
| pcm_audio_data_.begin() + num_delete); |
| } |
| |
| if (is_muted) { |
| muted_timer_.SignalMuted(); |
| } else { |
| muted_timer_.SignalSound(); |
| } |
| // Trim values from source channels that were just transferred to |
| // pcm_audio_data. |
| for (const auto& audio_datum : audio_channel_) { |
| audio_datum->erase(audio_datum->begin(), |
| audio_datum->begin() + num_elements); |
| } |
| } |
| |
| AudioGraph ^ audio_graph_ = nullptr; |
| AudioEncodingProperties ^ wave_encoder_; |
| std::vector<AudioDeviceInputNode ^> input_nodes_; |
| std::vector<AudioFrameOutputNode ^> audio_frame_nodes_; |
| std::vector<std::unique_ptr<std::vector<float>>> audio_channel_; |
| std::vector<int16_t> pcm_audio_data_; |
| size_t max_num_samples_ = 0; |
| MutedTrigger muted_timer_; |
| Mutex mutex_; |
| }; |
| |
| // Implements the SbMicrophonePrivate interface. |
| class MicrophoneImpl : public SbMicrophonePrivate { |
| public: |
| MicrophoneImpl(int sample_rate, int buffer_size_bytes) |
| : buffer_size_bytes_(buffer_size_bytes), sample_rate_(sample_rate) {} |
| |
| ~MicrophoneImpl() { Close(); } |
| |
| bool Open() override { |
| if (!microphone_) { |
| if (IsUiThread()) { |
| SB_LOG(INFO) << "Could not open microphone from UI thread."; |
| return false; |
| } |
| microphone_ = MicrophoneProcessor::TryCreateAndStartRecording( |
| buffer_size_bytes_ / kOutputBytesPerSample, sample_rate_); |
| } |
| return microphone_ != nullptr; |
| } |
| |
| bool Close() override { |
| microphone_.reset(nullptr); |
| return true; |
| } |
| |
| int Read(void* out_audio_data, int audio_data_size) override { |
| if (!microphone_) { |
| return -1; |
| } |
| int16_t* pcm_buffer = reinterpret_cast<int16*>(out_audio_data); |
| size_t pcm_buffer_count = audio_data_size / kOutputBytesPerSample; |
| int n_samples = microphone_->Read(pcm_buffer, pcm_buffer_count); |
| if (n_samples < 0) { |
| return -1; // Is error. |
| } else { |
| return n_samples * kOutputBytesPerSample; |
| } |
| } |
| |
| private: |
| const int buffer_size_bytes_; |
| const int sample_rate_; |
| scoped_ptr<MicrophoneProcessor> microphone_; |
| }; |
| |
| // Singleton access is required by the microphone interface as specified by |
| // nplb. |
| const SbMicrophoneId kSingletonId = reinterpret_cast<SbMicrophoneId>(0x1); |
| starboard::atomic_pointer<MicrophoneImpl*> s_singleton_pointer; |
| |
| } // namespace. |
| |
| int SbMicrophonePrivate::GetAvailableMicrophones( |
| SbMicrophoneInfo* out_info_array, |
| int info_array_size) { |
| std::vector<DeviceInformation ^> mic_devices = GetAllMicrophoneDevices(); |
| if (mic_devices.empty()) { |
| return 0; |
| } |
| if (out_info_array && (info_array_size >= 1)) { |
| SbMicrophoneInfo info; |
| memset(&info, 0, sizeof(info)); |
| info.id = kSingletonId; |
| info.type = kSBMicrophoneAnalogHeadset; |
| info.max_sample_rate_hz = kMaxSampleRate; |
| info.min_read_size = kMinReadSizeBytes; |
| |
| std::stringstream all_mic_names; |
| for (size_t i = 0; i < mic_devices.size(); ++i) { |
| DeviceInformation ^ mic_dev = mic_devices[i]; |
| if (i > 0) { |
| all_mic_names << ", "; |
| } |
| all_mic_names << "[" << platformStringToString(mic_dev->Name) << "]"; |
| } |
| starboard::strlcpy(info.label, all_mic_names.str().c_str(), |
| SB_ARRAY_SIZE(info.label)); |
| out_info_array[0] = info; |
| } |
| return 1; |
| } |
| |
| bool SbMicrophonePrivate::IsMicrophoneSampleRateSupported( |
| SbMicrophoneId id, |
| int sample_rate_in_hz) { |
| if (!SbMicrophoneIdIsValid(id)) { |
| return false; |
| } |
| return (kMinSampleRate <= sample_rate_in_hz) && |
| (sample_rate_in_hz <= kMaxSampleRate); |
| } |
| |
| SbMicrophone SbMicrophonePrivate::CreateMicrophone(SbMicrophoneId id, |
| int sample_rate_in_hz, |
| int buffer_size_bytes) { |
| if (!SbMicrophoneIdIsValid(id)) { |
| return kSbMicrophoneInvalid; |
| } |
| if (sample_rate_in_hz < kMinSampleRate) { |
| return kSbMicrophoneInvalid; |
| } |
| if (sample_rate_in_hz > kMaxSampleRate) { |
| return kSbMicrophoneInvalid; |
| } |
| if (buffer_size_bytes <= 0) { |
| return kSbMicrophoneInvalid; |
| } |
| // Required to conform to nplb test. |
| if (buffer_size_bytes >= (std::numeric_limits<int>::max() - 1)) { |
| return kSbMicrophoneInvalid; |
| } |
| // Id will either by 1 or 0. At this time there is only one microphone. |
| SB_DCHECK(id == kSingletonId); |
| if (s_singleton_pointer.load()) { |
| return kSbMicrophoneInvalid; |
| } |
| MicrophoneImpl* new_microphone = |
| new MicrophoneImpl(sample_rate_in_hz, buffer_size_bytes); |
| |
| s_singleton_pointer.store(new_microphone); |
| return new_microphone; |
| } |
| |
| void SbMicrophonePrivate::DestroyMicrophone(SbMicrophone microphone) { |
| SB_DCHECK(microphone == s_singleton_pointer.load()); |
| s_singleton_pointer.store(nullptr); |
| delete microphone; |
| } |