media/audio/win/audio_low_latency_input_win.cc - cobalt - Git at Google

 // Copyright 2012 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "media/audio/win/audio_low_latency_input_win.h"

 #include <objbase.h>
 #include <propkey.h>
 #include <windows.devices.enumeration.h>
 #include <windows.media.devices.h>

 #include <algorithm>
 #include <cmath>
 #include <memory>
 #include <utility>

 #include "base/logging.h"
 #include "base/metrics/histogram_functions.h"
 #include "base/metrics/histogram_macros.h"
 #include "base/strings/stringprintf.h"
 #include "base/strings/utf_string_conversions.h"
 #include "base/trace_event/common/trace_event_common.h"
 #include "base/trace_event/trace_event.h"
 #include "base/win/core_winrt_util.h"
 #include "base/win/scoped_propvariant.h"
 #include "base/win/scoped_variant.h"
 #include "base/win/vector.h"
 #include "base/win/windows_version.h"
 #include "media/audio/audio_device_description.h"
 #include "media/audio/audio_features.h"
 #include "media/audio/win/avrt_wrapper_win.h"
 #include "media/audio/win/core_audio_util_win.h"
 #include "media/base/audio_block_fifo.h"
 #include "media/base/audio_bus.h"
 #include "media/base/audio_timestamp_helper.h"
 #include "media/base/channel_layout.h"
 #include "media/base/limits.h"
 #include "media/base/media_switches.h"
 #include "media/base/timestamp_constants.h"

 using ABI::Windows::Foundation::Collections::IVectorView;
 using ABI::Windows::Media::Devices::IMediaDeviceStatics;
 using ABI::Windows::Media::Effects::IAudioCaptureEffectsManager;
 using ABI::Windows::Media::Effects::IAudioEffectsManagerStatics;
 using base::win::GetActivationFactory;
 using base::win::ScopedCoMem;
 using base::win::ScopedCOMInitializer;
 using base::win::ScopedHString;
 using Microsoft::WRL::ComPtr;

 namespace media {

 namespace {

 constexpr char kUwpDeviceIdPrefix[] = "\\\\?\\SWD#MMDEVAPI#";

 constexpr uint32_t KSAUDIO_SPEAKER_UNSUPPORTED = 0;

 // Converts a COM error into a human-readable string.
 std::string ErrorToString(HRESULT hresult) {
   return CoreAudioUtil::ErrorToString(hresult);
 }

 // Errors when initializing the audio client related to the audio format. Split
 // by whether we're using format conversion or not. Used for reporting stats -
 // do not renumber entries.
 enum FormatRelatedInitError {
   kUnsupportedFormat = 0,
   kUnsupportedFormatWithFormatConversion = 1,
   kInvalidArgument = 2,
   kInvalidArgumentWithFormatConversion = 3,
   kCount
 };

 bool IsSupportedFormatForConversion(WAVEFORMATEXTENSIBLE* format_ex) {
   WAVEFORMATEX* format = &format_ex->Format;
   if (format->nSamplesPerSec < limits::kMinSampleRate ||
       format->nSamplesPerSec > limits::kMaxSampleRate) {
     return false;
   }

   switch (format->wBitsPerSample) {
     case 8:
     case 16:
     case 32:
       break;
     default:
       return false;
   }

   if (GuessChannelLayout(format->nChannels) == CHANNEL_LAYOUT_UNSUPPORTED) {
     LOG(ERROR) << "Hardware configuration not supported for audio conversion";
     return false;
   }

   return true;
 }

 // Converts ChannelLayout to Microsoft's channel configuration but only discrete
 // and up to stereo is supported currently. All other multi-channel layouts
 // return KSAUDIO_SPEAKER_UNSUPPORTED.
 ChannelConfig ChannelLayoutToChannelConfig(ChannelLayout layout) {
   switch (layout) {
     case CHANNEL_LAYOUT_DISCRETE:
       return KSAUDIO_SPEAKER_DIRECTOUT;
     case CHANNEL_LAYOUT_MONO:
       return KSAUDIO_SPEAKER_MONO;
     case CHANNEL_LAYOUT_STEREO:
       return KSAUDIO_SPEAKER_STEREO;
     default:
       LOG(WARNING) << "Unsupported channel layout: " << layout;
       // KSAUDIO_SPEAKER_UNSUPPORTED equals 0 and corresponds to "no specific
       // channel order".
       return KSAUDIO_SPEAKER_UNSUPPORTED;
   }
 }

 const char* StreamOpenResultToString(
     WASAPIAudioInputStream::StreamOpenResult result) {
   switch (result) {
     case WASAPIAudioInputStream::OPEN_RESULT_OK:
       return "OK";
     case WASAPIAudioInputStream::OPEN_RESULT_CREATE_INSTANCE:
       return "CREATE_INSTANCE";
     case WASAPIAudioInputStream::OPEN_RESULT_NO_ENDPOINT:
       return "NO_ENDPOINT";
     case WASAPIAudioInputStream::OPEN_RESULT_NO_STATE:
       return "NO_STATE";
     case WASAPIAudioInputStream::OPEN_RESULT_DEVICE_NOT_ACTIVE:
       return "DEVICE_NOT_ACTIVE";
     case WASAPIAudioInputStream::OPEN_RESULT_ACTIVATION_FAILED:
       return "ACTIVATION_FAILED";
     case WASAPIAudioInputStream::OPEN_RESULT_FORMAT_NOT_SUPPORTED:
       return "FORMAT_NOT_SUPPORTED";
     case WASAPIAudioInputStream::OPEN_RESULT_AUDIO_CLIENT_INIT_FAILED:
       return "AUDIO_CLIENT_INIT_FAILED";
     case WASAPIAudioInputStream::OPEN_RESULT_GET_BUFFER_SIZE_FAILED:
       return "GET_BUFFER_SIZE_FAILED";
     case WASAPIAudioInputStream::OPEN_RESULT_LOOPBACK_ACTIVATE_FAILED:
       return "LOOPBACK_ACTIVATE_FAILED";
     case WASAPIAudioInputStream::OPEN_RESULT_LOOPBACK_INIT_FAILED:
       return "LOOPBACK_INIT_FAILED";
     case WASAPIAudioInputStream::OPEN_RESULT_SET_EVENT_HANDLE:
       return "SET_EVENT_HANDLE";
     case WASAPIAudioInputStream::OPEN_RESULT_NO_CAPTURE_CLIENT:
       return "NO_CAPTURE_CLIENT";
     case WASAPIAudioInputStream::OPEN_RESULT_NO_AUDIO_VOLUME:
       return "NO_AUDIO_VOLUME";
     case WASAPIAudioInputStream::OPEN_RESULT_OK_WITH_RESAMPLING:
       return "OK_WITH_RESAMPLING";
   }
   return "UNKNOWN";
 }

 const char* EffectTypeToString(
     ABI::Windows::Media::Effects::AudioEffectType type) {
   switch (type) {
     case ABI::Windows::Media::Effects::AudioEffectType_Other:
       return "Other/None";
     case ABI::Windows::Media::Effects::AudioEffectType_AcousticEchoCancellation:
       return "AcousticEchoCancellation";
     case ABI::Windows::Media::Effects::AudioEffectType_NoiseSuppression:
       return "NoiseSuppression";
     case ABI::Windows::Media::Effects::AudioEffectType_AutomaticGainControl:
       return "AutomaticGainControl";
     case ABI::Windows::Media::Effects::AudioEffectType_BeamForming:
       return "BeamForming";
     case ABI::Windows::Media::Effects::AudioEffectType_ConstantToneRemoval:
       return "ConstantToneRemoval";
     case ABI::Windows::Media::Effects::AudioEffectType_Equalizer:
       return "Equalizer";
     case ABI::Windows::Media::Effects::AudioEffectType_LoudnessEqualizer:
       return "LoudnessEqualizer";
     case ABI::Windows::Media::Effects::AudioEffectType_BassBoost:
       return "BassBoost";
     case ABI::Windows::Media::Effects::AudioEffectType_VirtualSurround:
       return "VirtualSurround";
     case ABI::Windows::Media::Effects::AudioEffectType_VirtualHeadphones:
       return "VirtualHeadphones";
     case ABI::Windows::Media::Effects::AudioEffectType_SpeakerFill:
       return "SpeakerFill";
     case ABI::Windows::Media::Effects::AudioEffectType_RoomCorrection:
       return "RoomCorrection";
     case ABI::Windows::Media::Effects::AudioEffectType_BassManagement:
       return "BassManagement";
     case ABI::Windows::Media::Effects::AudioEffectType_EnvironmentalEffects:
       return "EnvironmentalEffects";
     case ABI::Windows::Media::Effects::AudioEffectType_SpeakerProtection:
       return "SpeakerProtection";
     case ABI::Windows::Media::Effects::AudioEffectType_SpeakerCompensation:
       return "SpeakerCompensation";
     case ABI::Windows::Media::Effects::AudioEffectType_DynamicRangeCompression:
       return "DynamicRangeCompression";
     case ABI::Windows::Media::Effects::AudioEffectType_FarFieldBeamForming:
       return "FarFieldBeamForming";
     case ABI::Windows::Media::Effects::AudioEffectType_DeepNoiseSuppression:
       return "DeepNoiseSuppression";
   }
   return "Unknown";
 }

 bool VariantBoolToBool(VARIANT_BOOL var_bool) {
   switch (var_bool) {
     case VARIANT_TRUE:
       return true;
     case VARIANT_FALSE:
       return false;
   }
   LOG(ERROR) << "Invalid VARIANT_BOOL type";
   return false;
 }

 std::string GetOpenLogString(WASAPIAudioInputStream::StreamOpenResult result,
                              HRESULT hr,
                              WAVEFORMATEXTENSIBLE input_format,
                              WAVEFORMATEX output_format) {
   return base::StringPrintf(
       "WAIS::Open => (ERROR: result=%s, hresult=%#lx, input_format=[%s], "
       "output_format=[%s])",
       StreamOpenResultToString(result), hr,
       CoreAudioUtil::WaveFormatToString(&input_format).c_str(),
       CoreAudioUtil::WaveFormatToString(&output_format).c_str());
 }

 bool InitializeUWPSupport() {
   // Place the actual body of the initialization in a lambda and store the
   // result as a static since we don't expect this result to change between
   // runs.
   static const bool initialization_result = []() {
     // Windows.Media.Effects and Windows.Media.Devices requires Windows 10 build
     // 10.0.10240.0.
     DCHECK_GE(base::win::OSInfo::GetInstance()->version_number().build, 10240u);

     return true;
   }();

   return initialization_result;
 }

 }  // namespace

 // Counts how often an OS capture callback reports a data discontinuity and logs
 // it as a UMA histogram.
 class WASAPIAudioInputStream::DataDiscontinuityReporter {
  public:
   // Logs once every 10s, assuming 10ms buffers.
   constexpr static int kCallbacksPerLogPeriod = 1000;

   DataDiscontinuityReporter() {}

   int GetLongTermDiscontinuityCountAndReset() {
     int long_term_count = data_discontinuity_long_term_count_;
     callback_count_ = 0;
     data_discontinuity_short_term_count_ = 0;
     data_discontinuity_long_term_count_ = 0;
     return long_term_count;
   }

   void Log(bool observed_data_discontinuity) {
     ++callback_count_;
     if (observed_data_discontinuity) {
       ++data_discontinuity_short_term_count_;
       ++data_discontinuity_long_term_count_;
     }

     if (callback_count_ % kCallbacksPerLogPeriod)
       return;

     // TODO(https://crbug.com/825744): It can be possible to replace
     // "Media.Audio.Capture.Glitches2" with this new (simplified) metric
     // instead.
     base::UmaHistogramCounts1000("Media.Audio.Capture.Win.Glitches2",
                                  data_discontinuity_short_term_count_);

     data_discontinuity_short_term_count_ = 0;
   }

  private:
   int callback_count_ = 0;
   int data_discontinuity_short_term_count_ = 0;
   int data_discontinuity_long_term_count_ = 0;
 };

 WASAPIAudioInputStream::WASAPIAudioInputStream(
     AudioManagerWin* manager,
     const AudioParameters& params,
     const std::string& device_id,
     AudioManager::LogCallback log_callback)
     : manager_(manager),
       glitch_reporter_(SystemGlitchReporter::StreamType::kCapture),
       peak_detector_(base::BindRepeating(&AudioManager::TraceAmplitudePeak,
                                          base::Unretained(manager_),
                                          /*trace_start=*/true)),
       data_discontinuity_reporter_(
           std::make_unique<DataDiscontinuityReporter>()),
       device_id_(device_id),
       log_callback_(std::move(log_callback)) {
   DCHECK(manager_);
   DCHECK(!device_id_.empty());
   DCHECK(!log_callback_.is_null());
   DCHECK_LE(params.channels(), 2);
   DCHECK(params.channel_layout() == CHANNEL_LAYOUT_MONO ||
          params.channel_layout() == CHANNEL_LAYOUT_STEREO ||
          params.channel_layout() == CHANNEL_LAYOUT_DISCRETE);
   SendLogMessage("%s({device_id=%s}, {params=[%s]})", __func__,
                  device_id.c_str(), params.AsHumanReadableString().c_str());

   // Load the Avrt DLL if not already loaded. Required to support MMCSS.
   bool avrt_init = avrt::Initialize();
   if (!avrt_init)
     SendLogMessage("%s => (WARNING: failed to load Avrt.dll)", __func__);

   const SampleFormat kSampleFormat = kSampleFormatS16;

   // The clients asks for an input stream specified by |params|. Start by
   // setting up an input device format according to the same specification.
   // If all goes well during the upcoming initialization, this format will not
   // change. However, under some circumstances, minor changes can be required
   // to fit the current input audio device. If so, a FIFO and/or and audio
   // converter might be needed to ensure that the output format of this stream
   // matches what the client asks for.
   WAVEFORMATEX* format = &input_format_.Format;
   format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
   format->nChannels = params.channels();
   format->nSamplesPerSec = params.sample_rate();
   format->wBitsPerSample = SampleFormatToBitsPerChannel(kSampleFormat);
   format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
   format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;

   // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE which can be
   // required in combination with e.g. multi-channel microphone arrays.
   format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
   input_format_.Samples.wValidBitsPerSample = format->wBitsPerSample;
   input_format_.dwChannelMask =
       ChannelLayoutToChannelConfig(params.channel_layout());
   input_format_.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
   SendLogMessage("%s => (audio engine format=[%s])", __func__,
                  CoreAudioUtil::WaveFormatToString(&input_format_).c_str());

   // Set up the fixed output format based on |params|. Will not be changed and
   // does not required an extended wave format structure since any multi-channel
   // input will be converted to stereo.
   output_format_.wFormatTag = WAVE_FORMAT_PCM;
   output_format_.nChannels = format->nChannels;
   output_format_.nSamplesPerSec = format->nSamplesPerSec;
   output_format_.wBitsPerSample = format->wBitsPerSample;
   output_format_.nBlockAlign = format->nBlockAlign;
   output_format_.nAvgBytesPerSec = format->nAvgBytesPerSec;
   output_format_.cbSize = 0;
   SendLogMessage("%s => (audio sink format=[%s])", __func__,
                  CoreAudioUtil::WaveFormatToString(&output_format_).c_str());

   // Size in bytes of each audio frame.
   frame_size_bytes_ = format->nBlockAlign;

   // Store size of audio packets which we expect to get from the audio
   // endpoint device in each capture event.
   packet_size_bytes_ = params.GetBytesPerBuffer(kSampleFormat);
   packet_size_frames_ = packet_size_bytes_ / format->nBlockAlign;
   SendLogMessage(
       "%s => (packet size=[%zu bytes/%zu audio frames/%.3f milliseconds])",
       __func__, packet_size_bytes_, packet_size_frames_,
       params.GetBufferDuration().InMillisecondsF());

   // All events are auto-reset events and non-signaled initially.

   // Create the event which the audio engine will signal each time
   // a buffer becomes ready to be processed by the client.
   audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
   DCHECK(audio_samples_ready_event_.IsValid());

   // Create the event which will be set in Stop() when capturing shall stop.
   stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
   DCHECK(stop_capture_event_.IsValid());
 }

 WASAPIAudioInputStream::~WASAPIAudioInputStream() {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
 }

 AudioInputStream::OpenOutcome WASAPIAudioInputStream::Open() {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   SendLogMessage("%s([opened=%s])", __func__, opened_ ? "true" : "false");
   if (opened_) {
     return OpenOutcome::kAlreadyOpen;
   }

   // Obtain a reference to the IMMDevice interface of the capturing device with
   // the specified unique identifier or role which was set at construction.
   HRESULT hr = SetCaptureDevice();
   if (FAILED(hr)) {
     ReportOpenResult(hr);
     return OpenOutcome::kFailed;
   }

   // Check if raw audio processing is supported for the selected capture device.
   raw_processing_supported_ = RawProcessingSupported();

   if (raw_processing_supported_ &&
       !AudioDeviceDescription::IsLoopbackDevice(device_id_) &&
       InitializeUWPSupport()) {
     // Retrieve a unique identifier of the selected audio device but in a
     // format which can be used by UWP (or Core WinRT) APIs. It can then be
     // utilized in combination with the Windows.Media.Effects UWP API to
     // discover the audio processing chain on a device.
     std::string uwp_device_id = GetUWPDeviceId();
     if (!uwp_device_id.empty()) {
       // For the selected device, generate two lists of enabled audio effects
       // and store them in |default_effect_types_| and |raw_effect_types_|.
       // Default corresponds to "Normal audio signal processing" and Raw is for
       // "Minimal audio signal processing". These two lists are used for UMA
       // stats when the stream is closed.
       GetAudioCaptureEffects(uwp_device_id);
     }
   }

   use_fake_audio_capture_timestamps_ =
       base::FeatureList::IsEnabled(media::kUseFakeAudioCaptureTimestamps);
   if (use_fake_audio_capture_timestamps_) {
     SendLogMessage("%s => (WARNING: capture timestamps will be fake)",
                    __func__);
   }

   // Obtain an IAudioClient interface which enables us to create and initialize
   // an audio stream between an audio application and the audio engine.
   hr = endpoint_device_->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr,
                                   &audio_client_);
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_ACTIVATION_FAILED;
     ReportOpenResult(hr);
     return OpenOutcome::kFailed;
   }

   // Raw audio capture suppresses processing that down mixes e.g. a microphone
   // array into a supported format and instead exposes the device's native
   // format. Chrome only supports a maximum number of input channels given by
   // media::kMaxConcurrentChannels. Therefore, one additional test is needed
   // before stating that raw audio processing can be supported.
   // Failure will not prevent opening but the method must succeed to be able to
   // select raw input capture mode.
   WORD audio_engine_channels = 0;
   hr = GetAudioEngineNumChannels(&audio_engine_channels);

   // Attempt to enable communications category and raw capture mode on the audio
   // stream. Ignoring return value since the method logs its own error messages
   // and it should be OK to continue opening the stream even after a failure.
   if (base::FeatureList::IsEnabled(media::kWasapiRawAudioCapture) &&
       raw_processing_supported_ &&
       !AudioDeviceDescription::IsLoopbackDevice(device_id_) && SUCCEEDED(hr)) {
     SetCommunicationsCategoryAndMaybeRawCaptureMode(audio_engine_channels);
   }

   // Verify that the selected audio endpoint supports the specified format
   // set during construction and using the specified client properties.
   hr = S_OK;
   if (!DesiredFormatIsSupported(&hr)) {
     open_result_ = OPEN_RESULT_FORMAT_NOT_SUPPORTED;
     ReportOpenResult(hr);
     return OpenOutcome::kFailed;
   }

   // Initialize the audio stream between the client and the device using
   // shared mode and a lowest possible glitch-free latency.
   hr = InitializeAudioEngine();
   if (SUCCEEDED(hr) && converter_)
     open_result_ = OPEN_RESULT_OK_WITH_RESAMPLING;
   ReportOpenResult(hr);  // Report before we assign a value to |opened_|.
   opened_ = SUCCEEDED(hr);

   if (opened_) {
     return OpenOutcome::kSuccess;
   }

   switch (hr) {
     case E_ACCESSDENIED:
       return OpenOutcome::kFailedSystemPermissions;
     case AUDCLNT_E_DEVICE_IN_USE:
       return OpenOutcome::kFailedInUse;
     default:
       return OpenOutcome::kFailed;
   }
 }

 void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   DCHECK(callback);
   SendLogMessage("%s([opened=%s, started=%s])", __func__,
                  opened_ ? "true" : "false", started_ ? "true" : "false");
   if (!opened_)
     return;

   if (started_)
     return;

   // Check if the master volume level of the opened audio session is set to
   // zero and store the information for a UMA histogram generated in Stop().
   // Valid volume levels are in the range 0.0 to 1.0.
   // See http://crbug.com/1014443 for details why this is needed.
   if (GetVolume() == 0.0) {
     SendLogMessage("%s => (WARNING: Input audio session starts at zero volume)",
                    __func__);
     audio_session_starts_at_zero_volume_ = true;
   }

   if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId &&
       system_audio_volume_) {
     BOOL muted = false;
     system_audio_volume_->GetMute(&muted);

     // If the system audio is muted at the time of capturing, then no need to
     // mute it again, and later we do not unmute system audio when stopping
     // capturing.
     if (!muted) {
       system_audio_volume_->SetMute(true, nullptr);
       mute_done_ = true;
     }
   }

   DCHECK(!sink_);
   sink_ = callback;

   // Starts periodic AGC microphone measurements if the AGC has been enabled
   // using SetAutomaticGainControl().
   StartAgc();

   // Create and start the thread that will drive the capturing by waiting for
   // capture events.
   DCHECK(!capture_thread_.get());
   capture_thread_ = std::make_unique<base::DelegateSimpleThread>(
       this, "wasapi_capture_thread",
       base::SimpleThread::Options(base::ThreadType::kRealtimeAudio));
   capture_thread_->Start();

   // Start streaming data between the endpoint buffer and the audio engine.
   HRESULT hr = audio_client_->Start();
   if (FAILED(hr)) {
     SendLogMessage("%s => (ERROR: IAudioClient::Start=[%s])", __func__,
                    ErrorToString(hr).c_str());
   }

   if (SUCCEEDED(hr) && audio_render_client_for_loopback_.Get()) {
     hr = audio_render_client_for_loopback_->Start();
     if (FAILED(hr))
       SendLogMessage("%s => (ERROR: IAudioClient::Start=[%s] (loopback))",
                      __func__, ErrorToString(hr).c_str());
   }

   started_ = SUCCEEDED(hr);
 }

 void WASAPIAudioInputStream::Stop() {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   SendLogMessage("%s([started=%s])", __func__, started_ ? "true" : "false");
   if (!started_)
     return;

   // Only upload UMA histogram for the case when AGC is enabled, i.e., for
   // WebRTC based audio input streams.
   const bool add_uma_histogram = GetAutomaticGainControl();

   // We have muted system audio for capturing, so we need to unmute it when
   // capturing stops.
   if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId &&
       mute_done_) {
     DCHECK(system_audio_volume_);
     if (system_audio_volume_) {
       system_audio_volume_->SetMute(false, nullptr);
       mute_done_ = false;
     }
   }

   // Stops periodic AGC microphone measurements.
   StopAgc();

   // Shut down the capture thread.
   if (stop_capture_event_.IsValid()) {
     SetEvent(stop_capture_event_.Get());
   }

   // Stop the input audio streaming.
   HRESULT hr = audio_client_->Stop();
   if (FAILED(hr)) {
     SendLogMessage("%s => (ERROR: IAudioClient::Stop=[%s])", __func__,
                    ErrorToString(hr).c_str());
   }

   // Wait until the thread completes and perform cleanup.
   if (capture_thread_) {
     SetEvent(stop_capture_event_.Get());
     capture_thread_->Join();
     capture_thread_.reset();
   }

   // Upload UMA histogram to track down possible issue that can lead to a
   // "no audio" state. See http://crbug.com/1014443.
   if (add_uma_histogram) {
     base::UmaHistogramBoolean("Media.Audio.InputVolumeStartsAtZeroWin",
                               audio_session_starts_at_zero_volume_);
     audio_session_starts_at_zero_volume_ = false;
   }

   SendLogMessage(
       "%s => (timestamp(n)-timestamp(n-1)=[min: %.3f msec, max: %.3f msec])",
       __func__, min_timestamp_diff_.InMillisecondsF(),
       max_timestamp_diff_.InMillisecondsF());

   started_ = false;
   sink_ = nullptr;
 }

 void WASAPIAudioInputStream::Close() {
   SendLogMessage("%s()", __func__);
   // It is valid to call Close() before calling open or Start().
   // It is also valid to call Close() after Start() has been called.
   Stop();

   // Only upload UMA histogram for the case when AGC is enabled, i.e., for
   // WebRTC based audio input streams.
   if (GetAutomaticGainControl()) {
     // Upload UMA histogram to track if the capture device supported raw audio
     // capture or not. See https://crbug.com/1133643.
     base::UmaHistogramBoolean("Media.Audio.RawProcessingSupportedWin",
                               raw_processing_supported_);

     // These UMAs are deprecated but keep adding the information as text logs
     // for debugging purposes.
     for (auto const& type : default_effect_types_) {
       SendLogMessage("%s => (Media.Audio.Capture.Win.DefaultEffectType=%s)",
                      __func__, EffectTypeToString(type));
     }
     for (auto const& type : raw_effect_types_) {
       SendLogMessage("%s => (Media.Audio.Capture.Win.RawEffectType=%s)",
                      __func__, EffectTypeToString(type));
     }
   }

   if (converter_)
     converter_->RemoveInput(this);

   ReportAndResetGlitchStats();

   // Inform the audio manager that we have been closed. This will cause our
   // destruction.
   manager_->ReleaseInputStream(this);
 }

 double WASAPIAudioInputStream::GetMaxVolume() {
   // Verify that Open() has been called successfully, to ensure that an audio
   // session exists and that an ISimpleAudioVolume interface has been created.
   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
   if (!opened_)
     return 0.0;

   // The effective volume value is always in the range 0.0 to 1.0, hence
   // we can return a fixed value (=1.0) here.
   return 1.0;
 }

 void WASAPIAudioInputStream::SetVolume(double volume) {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   DCHECK_GE(volume, 0.0);
   DCHECK_LE(volume, 1.0);
   SendLogMessage("%s({volume=%.2f} [opened=%s])", __func__, volume,
                  opened_ ? "true" : "false");
   if (!opened_)
     return;

   // Set a new master volume level. Valid volume levels are in the range
   // 0.0 to 1.0. Ignore volume-change events.
   HRESULT hr = simple_audio_volume_->SetMasterVolume(static_cast<float>(volume),
                                                      nullptr);
   if (FAILED(hr)) {
     SendLogMessage("%s => (ERROR: ISimpleAudioVolume::SetMasterVolume=[%s])",
                    __func__, ErrorToString(hr).c_str());
   }

   // Update the AGC volume level based on the last setting above. Note that,
   // the volume-level resolution is not infinite and it is therefore not
   // possible to assume that the volume provided as input parameter can be
   // used directly. Instead, a new query to the audio hardware is required.
   // This method does nothing if AGC is disabled.
   UpdateAgcVolume();
 }

 double WASAPIAudioInputStream::GetVolume() {
   DCHECK(opened_) << "Open() has not been called successfully";
   if (!opened_)
     return 0.0;

   // Retrieve the current volume level. The value is in the range 0.0 to 1.0.
   float level = 0.0f;
   HRESULT hr = simple_audio_volume_->GetMasterVolume(&level);
   if (FAILED(hr)) {
     SendLogMessage("%s => (ERROR: ISimpleAudioVolume::GetMasterVolume=[%s])",
                    __func__, ErrorToString(hr).c_str());
   }

   return static_cast<double>(level);
 }

 bool WASAPIAudioInputStream::IsMuted() {
   DCHECK(opened_) << "Open() has not been called successfully";
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   if (!opened_)
     return false;

   // Retrieves the current muting state for the audio session.
   BOOL is_muted = FALSE;
   HRESULT hr = simple_audio_volume_->GetMute(&is_muted);
   if (FAILED(hr)) {
     SendLogMessage("%s => (ERROR: ISimpleAudioVolume::GetMute=[%s])", __func__,
                    ErrorToString(hr).c_str());
   }

   return is_muted != FALSE;
 }

 void WASAPIAudioInputStream::SetOutputDeviceForAec(
     const std::string& output_device_id) {
   // Not supported. Do nothing.
 }

 void WASAPIAudioInputStream::SendLogMessage(const char* format, ...) {
   if (log_callback_.is_null())
     return;
   va_list args;
   va_start(args, format);
   std::string msg("WAIS::" + base::StringPrintV(format, args));
   log_callback_.Run(msg);
   va_end(args);
 }

 void WASAPIAudioInputStream::Run() {
   ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);

   // Enable MMCSS to ensure that this thread receives prioritized access to
   // CPU resources.
   DWORD task_index = 0;
   HANDLE mm_task =
       avrt::AvSetMmThreadCharacteristics(L"Pro Audio", &task_index);
   bool mmcss_is_ok =
       (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
   if (!mmcss_is_ok) {
     // Failed to enable MMCSS on this thread. It is not fatal but can lead
     // to reduced QoS at high load.
     DWORD err = GetLastError();
     LOG(ERROR) << "WAIS::" << __func__
                << " => (ERROR: Failed to enable MMCSS (error code=" << err
                << "))";
   }

   // Allocate a buffer with a size that enables us to take care of cases like:
   // 1) The recorded buffer size is smaller, or does not match exactly with,
   //    the selected packet size used in each callback.
   // 2) The selected buffer size is larger than the recorded buffer size in
   //    each event.
   // In the case where no resampling is required, a single buffer should be
   // enough but in case we get buffers that don't match exactly, we'll go with
   // two. Same applies if we need to resample and the buffer ratio is perfect.
   // However if the buffer ratio is imperfect, we will need 3 buffers to safely
   // be able to buffer up data in cases where a conversion requires two audio
   // buffers (and we need to be able to write to the third one).
   size_t capture_buffer_size =
       std::max(2 * endpoint_buffer_size_frames_ * frame_size_bytes_,
                2 * packet_size_frames_ * frame_size_bytes_);
   int buffers_required = capture_buffer_size / packet_size_bytes_;
   if (converter_ && imperfect_buffer_size_conversion_)
     ++buffers_required;

   DCHECK(!fifo_);
   fifo_ = std::make_unique<AudioBlockFifo>(
       input_format_.Format.nChannels, packet_size_frames_, buffers_required);
   DVLOG(1) << "AudioBlockFifo buffer count: " << buffers_required;

   bool recording = true;
   bool error = false;
   HANDLE wait_array[2] = {stop_capture_event_.Get(),
                           audio_samples_ready_event_.Get()};

   record_start_time_ = base::TimeTicks::Now();
   last_capture_time_ = base::TimeTicks();
   max_timestamp_diff_ = base::TimeDelta::Min();
   min_timestamp_diff_ = base::TimeDelta::Max();

   while (recording && !error) {
     // Wait for a close-down event or a new capture event.
     DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE);
     switch (wait_result) {
       case WAIT_OBJECT_0 + 0:
         // |stop_capture_event_| has been set.
         recording = false;
         break;
       case WAIT_OBJECT_0 + 1:
         // |audio_samples_ready_event_| has been set.
         PullCaptureDataAndPushToSink();
         break;
       case WAIT_FAILED:
       default:
         error = true;
         break;
     }
   }

   if (recording && error) {
     // TODO(henrika): perhaps it worth improving the cleanup here by e.g.
     // stopping the audio client, joining the thread etc.?
     NOTREACHED() << "WASAPI capturing failed with error code "
                  << GetLastError();
   }

   // Disable MMCSS.
   if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
     PLOG(WARNING) << "Failed to disable MMCSS";
   }

   fifo_.reset();
 }

 void WASAPIAudioInputStream::PullCaptureDataAndPushToSink() {
   TRACE_EVENT1("audio", "WASAPIAudioInputStream::PullCaptureDataAndPushToSink",
                "sample rate", input_format_.Format.nSamplesPerSec);

   UINT64 last_device_position = 0;
   UINT32 num_frames_in_next_packet = 0;

   // Get the number of frames in the next data packet in the capture endpoint
   // buffer. The count reported by GetNextPacketSize matches the count retrieved
   // in the GetBuffer call that follows this call.
   HRESULT hr =
       audio_capture_client_->GetNextPacketSize(&num_frames_in_next_packet);
   if (FAILED(hr)) {
     LOG(ERROR) << "WAIS::" << __func__
                << " => (ERROR: 1-IAudioCaptureClient::GetNextPacketSize=["
                << ErrorToString(hr).c_str() << "])";
     return;
   }

   // Pull data from the capture endpoint buffer until it's empty or an error
   // occurs. Drains the WASAPI capture buffer fully.
   while (num_frames_in_next_packet > 0) {
     BYTE* data_ptr = nullptr;
     UINT32 num_frames_to_read = 0;
     DWORD flags = 0;
     UINT64 device_position = 0;
     UINT64 capture_time_100ns = 0;

     // Retrieve the amount of data in the capture endpoint buffer, replace it
     // with silence if required, create callbacks for each packet and store
     // non-delivered data for the next event.
     hr =
         audio_capture_client_->GetBuffer(&data_ptr, &num_frames_to_read, &flags,
                                          &device_position, &capture_time_100ns);
     if (hr == AUDCLNT_S_BUFFER_EMPTY) {
       DCHECK_EQ(num_frames_to_read, 0u);
       return;
     }
     if (hr == AUDCLNT_E_OUT_OF_ORDER) {
       // A previous IAudioCaptureClient::GetBuffer() call is still in effect.
       // Release any acquired buffer to be able to try reading a buffer again.
       audio_capture_client_->ReleaseBuffer(num_frames_to_read);
     }
     if (FAILED(hr)) {
       LOG(ERROR) << "WAIS::" << __func__
                  << " => (ERROR: IAudioCaptureClient::GetBuffer=["
                  << ErrorToString(hr).c_str() << "])";
       return;
     }

     // The data in the packet is not correlated with the previous packet's
     // device position; this is possibly due to a stream state transition or
     // timing glitch. Note that, usage of this flag was added after the existing
     // glitch detection and it will be used as a supplementary scheme initially.
     // The behavior of the AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY flag is
     // undefined on the application's first call to GetBuffer after Start and
     // Windows 7 or later is required for support.
     // TODO(https://crbug.com/1427096): take this into account when reporting
     // glitch info.
     const bool observed_data_discontinuity =
         (device_position > 0 && flags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY);
     if (observed_data_discontinuity) {
       LOG(WARNING) << "WAIS::" << __func__
                    << " => (WARNING: AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY)";
     }
     data_discontinuity_reporter_->Log(observed_data_discontinuity);

     // The time at which the device's stream position was recorded is uncertain.
     // Thus, the client might be unable to accurately set a time stamp for the
     // current data packet.
     bool timestamp_error_was_detected = false;
     if (flags & AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR) {
       // TODO(https://crbug.com/825744): it might be possible to improve error
       // handling here and avoid using the counter in |capture_time_100ns|.
       LOG(WARNING) << "WAIS::" << __func__
                    << " => (WARNING: AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR)";
       if (num_timestamp_errors_ == 0) {
         // Measure the time it took until the first timestamp error was found.
         time_until_first_timestamp_error_ =
             base::TimeTicks::Now() - record_start_time_;
       }
       ++num_timestamp_errors_;
       timestamp_error_was_detected = true;
     }

     // If the device position has changed, we assume this data belongs to a new
     // chunk, so we report delay and glitch stats and update the last and next
     // expected device positions.
     // If the device position has not changed we assume this data belongs to the
     // previous chunk, and only update the expected next device position.
     if (device_position != last_device_position) {
       if (expected_next_device_position_ != 0) {
         base::TimeDelta glitch_duration;
         if (device_position > expected_next_device_position_) {
           glitch_duration = AudioTimestampHelper::FramesToTime(
               device_position - expected_next_device_position_,
               input_format_.Format.nSamplesPerSec);
         }
         glitch_reporter_.UpdateStats(glitch_duration);
         if (glitch_duration.is_positive()) {
           glitch_accumulator_.Add({.duration = glitch_duration, .count = 1});
         }
       }

       last_device_position = device_position;
       expected_next_device_position_ = device_position + num_frames_to_read;
     } else {
       expected_next_device_position_ += num_frames_to_read;
     }

     base::TimeTicks capture_time;
     if (use_fake_audio_capture_timestamps_) {
       capture_time = base::TimeTicks::Now();
     } else if (!timestamp_error_was_detected) {
       // Use the latest |capture_time_100ns| since it is marked as valid.
       capture_time += base::Microseconds(capture_time_100ns / 10.0);
     }
     if (capture_time <= last_capture_time_) {
       // Latest |capture_time_100ns| can't be trusted. Ensure a monotonic time-
       // stamp sequence by adding one microsecond to the latest timestamp.
       capture_time = last_capture_time_ + base::Microseconds(1);
     }

     // Keep track of max and min time difference between two successive time-
     // stamps. Results are used in Stop() to verify that the time-stamp sequence
     // was monotonic.
     if (!last_capture_time_.is_null()) {
       const auto delta_ts = capture_time - last_capture_time_;
       DCHECK_GT(device_position, 0u);
       DCHECK_GT(delta_ts, base::TimeDelta::Min());
       if (delta_ts > max_timestamp_diff_) {
         max_timestamp_diff_ = delta_ts;
       } else if (delta_ts < min_timestamp_diff_) {
         min_timestamp_diff_ = delta_ts;
       }
     }

     // Store the capture timestamp. Might be used as reference next time if
     // a new valid timestamp can't be retrieved to always guarantee a monotonic
     // sequence.
     last_capture_time_ = capture_time;

     // Adjust |capture_time| for the FIFO before pushing.
     capture_time -= AudioTimestampHelper::FramesToTime(
         fifo_->GetAvailableFrames(), input_format_.Format.nSamplesPerSec);

     if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
       fifo_->PushSilence(num_frames_to_read);
     } else {
       const int bytes_per_sample = input_format_.Format.wBitsPerSample / 8;

       peak_detector_.FindPeak(data_ptr, num_frames_to_read, bytes_per_sample);
       fifo_->Push(data_ptr, num_frames_to_read, bytes_per_sample);
     }

     hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read);
     if (FAILED(hr)) {
       LOG(ERROR) << "WAIS::" << __func__
                  << " => (ERROR: IAudioCaptureClient::ReleaseBuffer=["
                  << ErrorToString(hr).c_str() << "])";
       return;
     }

     // Get a cached AGC volume level which is updated once every second on the
     // audio manager thread. Note that, |volume| is also updated each time
     // SetVolume() is called through IPC by the render-side AGC.
     double volume = 0.0;
     GetAgcVolume(&volume);

     // Deliver captured data to the registered consumer using a packet size
     // which was specified at construction.
     while (fifo_->available_blocks()) {
       if (converter_) {
         if (imperfect_buffer_size_conversion_ &&
             fifo_->available_blocks() == 1) {
           // Special case. We need to buffer up more audio before we can convert
           // or else we'll suffer an underrun.
           // TODO(grunell): Verify this is really true.
           return;
         }
         converter_->Convert(convert_bus_.get());
         sink_->OnData(convert_bus_.get(), capture_time, volume,
                       glitch_accumulator_.GetAndReset());

         // Move the capture time forward for each vended block.
         capture_time += AudioTimestampHelper::FramesToTime(
             convert_bus_->frames(), output_format_.nSamplesPerSec);
       } else {
         sink_->OnData(fifo_->Consume(), capture_time, volume,
                       glitch_accumulator_.GetAndReset());

         // Move the capture time forward for each vended block.
         capture_time += AudioTimestampHelper::FramesToTime(
             packet_size_frames_, input_format_.Format.nSamplesPerSec);
       }
     }

     // Get the number of frames in the next data packet in the capture endpoint
     // buffer. Keep reading if more samples exist.
     hr = audio_capture_client_->GetNextPacketSize(&num_frames_in_next_packet);
     if (FAILED(hr)) {
       LOG(ERROR) << "WAIS::" << __func__
                  << " => (ERROR: 2-IAudioCaptureClient::GetNextPacketSize=["
                  << ErrorToString(hr).c_str() << "])";
       return;
     }
   }  // while (num_frames_in_next_packet > 0)
 }

 void WASAPIAudioInputStream::HandleError(HRESULT err) {
   NOTREACHED() << "Error code: " << err;
   if (sink_)
     sink_->OnError();
 }

 HRESULT WASAPIAudioInputStream::SetCaptureDevice() {
   DCHECK_EQ(OPEN_RESULT_OK, open_result_);
   DCHECK(!endpoint_device_.Get());
   SendLogMessage("%s()", __func__);

   Microsoft::WRL::ComPtr<IMMDeviceEnumerator> enumerator;
   HRESULT hr = ::CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr,
                                   CLSCTX_ALL, IID_PPV_ARGS(&enumerator));
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_CREATE_INSTANCE;
     return hr;
   }

   // Retrieve the IMMDevice by using the specified role or the specified
   // unique endpoint device-identification string.

   // To open a stream in loopback mode, the client must obtain an IMMDevice
   // interface for the rendering endpoint device. Make that happen if needed;
   // otherwise use default capture data-flow direction.
   const EDataFlow data_flow =
       AudioDeviceDescription::IsLoopbackDevice(device_id_) ? eRender : eCapture;
   // Determine selected role to be used if the device is a default device.
   const ERole role = AudioDeviceDescription::IsCommunicationsDevice(device_id_)
                          ? eCommunications
                          : eConsole;
   if (AudioDeviceDescription::IsDefaultDevice(device_id_) ||
       AudioDeviceDescription::IsCommunicationsDevice(device_id_) ||
       AudioDeviceDescription::IsLoopbackDevice(device_id_)) {
     hr =
         enumerator->GetDefaultAudioEndpoint(data_flow, role, &endpoint_device_);
   } else {
     hr = enumerator->GetDevice(base::UTF8ToWide(device_id_).c_str(),
                                &endpoint_device_);
   }
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_NO_ENDPOINT;
     return hr;
   }

   // Get the volume interface for the endpoint. Used in `Stop()` to query the
   // volume range of the selected input device or to get/set mute state in
   // `Start()` and `Stop()` if a loopback device with muted system audio is
   // requested.
   hr = endpoint_device_->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL,
                                   nullptr, &system_audio_volume_);
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_ACTIVATION_FAILED;
     return hr;
   }

   // Verify that the audio endpoint device is active, i.e., the audio
   // adapter that connects to the endpoint device is present and enabled.
   DWORD state = DEVICE_STATE_DISABLED;
   hr = endpoint_device_->GetState(&state);
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_NO_STATE;
     return hr;
   }

   if (!(state & DEVICE_STATE_ACTIVE)) {
     DLOG(ERROR) << "Selected capture device is not active.";
     open_result_ = OPEN_RESULT_DEVICE_NOT_ACTIVE;
     hr = E_ACCESSDENIED;
   }

   return hr;
 }

 bool WASAPIAudioInputStream::RawProcessingSupported() {
   DCHECK(endpoint_device_.Get());
   // Check if System.Devices.AudioDevice.RawProcessingSupported can be found
   // and queried in the Windows Property System. It corresponds to raw
   // processing mode support for the specified audio device. If its value is
   // VARIANT_TRUE the device supports raw processing mode.
   bool raw_processing_supported = false;
   Microsoft::WRL::ComPtr<IPropertyStore> properties;
   base::win::ScopedPropVariant raw_processing;
   if (FAILED(endpoint_device_->OpenPropertyStore(STGM_READ, &properties)) ||
       FAILED(
           properties->GetValue(PKEY_Devices_AudioDevice_RawProcessingSupported,
                                raw_processing.Receive())) ||
       raw_processing.get().vt != VT_BOOL) {
     SendLogMessage(
         "%s => (WARNING: failed to access "
         "System.Devices.AudioDevice.RawProcessingSupported)",
         __func__);
   } else {
     raw_processing_supported = VariantBoolToBool(raw_processing.get().boolVal);
     SendLogMessage(
         "%s => (System.Devices.AudioDevice.RawProcessingSupported=%s)",
         __func__, raw_processing_supported ? "true" : "false");
   }
   return raw_processing_supported;
 }

 std::string WASAPIAudioInputStream::GetUWPDeviceId() {
   DCHECK(endpoint_device_.Get());

   // The Windows.Media.Devices.IMediaDeviceStatics interface provides access to
   // the implementation of Windows.Media.Devices.MediaDevice.
   ComPtr<IMediaDeviceStatics> media_device_statics;
   HRESULT hr =
       GetActivationFactory<IMediaDeviceStatics,
                            RuntimeClass_Windows_Media_Devices_MediaDevice>(
           &media_device_statics);
   if (FAILED(hr)) {
     DLOG(ERROR) << "IMediaDeviceStatics factory failed: " << ErrorToString(hr);
     return std::string();
   }

   // The remaining part of this method builds up the unique device ID needed
   // by the Windows.Media.Effects.AudioEffectsManager UWP API to enumerate
   // active capture effects like AEC and NS. The ID contains three parts.
   // Example:
   //   1) \\?\SWD#MMDEVAPI#
   //   2) {0.0.1.00000000}.{7c24467c-94fc-4fa1-a2b2-a3f5d9cb8a5b}
   //   3) #{2eef81be-33fa-4800-9670-1cd474972c3f}
   // Where (1) is a constant string, (2) comes from the IMMDevice::GetId() API,
   // and (3) is a substring of of the selector string which can be retrieved by
   // the IMediaDeviceStatics::GetAudioCaptureSelector UWP API. Knowledge about
   // the structure of this device ID can be gained by using the
   // IMediaDeviceStatics::GetDefaultAudioCaptureId UWP API but this method also
   // adds support for non default devices.

   // (1) Start building the final device ID. Start with the constant prefix.
   std::string device_id(kUwpDeviceIdPrefix);

   // (2) Next, add the unique ID from IMMDevice::GetId() API.
   // Example: {0.0.1.00000000}.{7c24467c-94fc-4fa1-a2b2-a3f5d9cb8a5b}.
   ScopedCoMem<WCHAR> immdevice_id16;
   hr = endpoint_device_->GetId(&immdevice_id16);
   if (FAILED(hr)) {
     DLOG(ERROR) << "IMMDevice::GetId failed: " << ErrorToString(hr);
     return std::string();
   }
   std::string immdevice_id8;
   base::WideToUTF8(immdevice_id16, wcslen(immdevice_id16), &immdevice_id8);
   device_id.append(immdevice_id8);

   // (3) Finally, add the last part from the selector string.
   // Example: '#{2eef81be-33fa-4800-9670-1cd474972c3f}'.
   HSTRING selector;
   // Returns the identifier string of a device for capturing audio. A substring
   // will be used when generating the final unique device ID.
   // Example: part of the selector string can look like
   // System.Devices.InterfaceClassGuid:="{2eef81be-33fa-4800-9670-1cd474972c3f}"
   // and we want the {2eef81be-33fa-4800-9670-1cd474972c3f} substring for our
   // purposes.
   hr = media_device_statics->GetAudioCaptureSelector(&selector);
   if (FAILED(hr)) {
     DLOG(ERROR) << "IMediaDeviceStatics::GetAudioCaptureSelector failed: "
                 << ErrorToString(hr);
     return std::string();
   }
   device_id.append("#");
   std::string selector_string = ScopedHString(selector).GetAsUTF8();
   std::size_t start = selector_string.find("{");
   std::size_t stop = selector_string.find("}", start + 1);
   if (start != std::string::npos && stop != std::string::npos) {
     // Will extract '{2eef81be-33fa-4800-9670-1cd474972c3f}' in the example
     // above.
     device_id.append(selector_string.substr(start, stop - start + 1));
   } else {
     DLOG(ERROR) << "Failed to extract System.Devices.InterfaceClassGuid string";
     return std::string();
   }

   return device_id;
 }

 HRESULT WASAPIAudioInputStream::GetAudioCaptureEffects(
     const std::string& uwp_device_id) {
   DCHECK(!AudioDeviceDescription::IsLoopbackDevice(device_id_));
   DCHECK(raw_processing_supported_);
   DCHECK(!uwp_device_id.empty());
   SendLogMessage("%s()", __func__);

   // The Windows.Media.Effects.IAudioEffectsManagerStatics interface provides
   // access to the implementation of Windows.Media.Effects.AudioEffectsManager.
   ComPtr<IAudioEffectsManagerStatics> audio_effects_manager;
   HRESULT hr = GetActivationFactory<
       IAudioEffectsManagerStatics,
       RuntimeClass_Windows_Media_Effects_AudioEffectsManager>(
       &audio_effects_manager);
   if (FAILED(hr)) {
     SendLogMessage(
         "%s => (ERROR: IAudioEffectsManagerStatics factory failed: [%s])",
         __func__, ErrorToString(hr).c_str());
     return hr;
   }

   SendLogMessage("%s => (uwp_device_id=[%s])", __func__, uwp_device_id.c_str());
   ScopedHString device_id = ScopedHString::Create(uwp_device_id);

   // Check capture effects for two different audio processing modes:
   // - Default: Normal audio signal processing
   // - Raw: Minimal audio signal processing
   // Raw is included since it is not possible to disable all effects on all
   // devices. In most cases, the number of found capture effects will be zero
   // for the raw mode.
   ABI::Windows::Media::AudioProcessing audio_processing_mode[] = {
       ABI::Windows::Media::AudioProcessing::AudioProcessing_Default,
       ABI::Windows::Media::AudioProcessing::AudioProcessing_Raw};
   for (size_t i = 0; i < std::size(audio_processing_mode); ++i) {
     // Create an AudioCaptureEffectsManager manager which can be used to
     // discover the audio processing chain on a device for a specific media
     // category and audio processing mode. The media category is fixed and set
     // to Communications since that is what we aim at using when audio effects
     // later are disabled.
     ComPtr<IAudioCaptureEffectsManager> capture_effects_manager;
     hr = audio_effects_manager->CreateAudioCaptureEffectsManagerWithMode(
         device_id.get(),
         ABI::Windows::Media::Capture::MediaCategory::
             MediaCategory_Communications,
         audio_processing_mode[i], &capture_effects_manager);
     if (FAILED(hr)) {
       SendLogMessage(
           "%s => (ERROR: IAudioEffectsManagerStatics::"
           "CreateAudioCaptureEffectsManager=[%s])",
           __func__, ErrorToString(hr).c_str());
       return hr;
     }

     // Get a list of audio effects on the device. Based on tests on different
     // devices, only enabled effects will be included. Hence, if a user has
     // explicitly disabled an effect using the System Sound Settings, that
     // component will not show up here.
     ComPtr<IVectorView<ABI::Windows::Media::Effects::AudioEffect*>> effects;
     hr = capture_effects_manager->GetAudioCaptureEffects(&effects);
     if (FAILED(hr)) {
       SendLogMessage(
           "%s => (ERROR: IAudioCaptureEffectsManager::"
           "GetAudioCaptureEffects=[%s])",
           __func__, ErrorToString(hr).c_str());
       return hr;
     }

     unsigned int count = 0;
     if (effects) {
       // Returns number of supported effects.
       effects->get_Size(&count);
     }

     // Store all supported and active effect types in |default_effect_types_|
     // or |raw_effect_types_| depending on selected audio processing mode.
     // These will be utilized later for UMA histograms.
     for (unsigned int j = 0; j < count; ++j) {
       ComPtr<ABI::Windows::Media::Effects::IAudioEffect> effect;
       hr = effects->GetAt(j, &effect);
       if (SUCCEEDED(hr)) {
         ABI::Windows::Media::Effects::AudioEffectType type;
         hr = effect->get_AudioEffectType(&type);
         if (SUCCEEDED(hr)) {
           audio_processing_mode[i] ==
                   ABI::Windows::Media::AudioProcessing::AudioProcessing_Default
               ? default_effect_types_.push_back(type)
               : raw_effect_types_.push_back(type);
         }
       }
     }

     // For cases when no audio effects were found (common in raw mode), add a
     // dummy effect type called AudioEffectType_Other so that the vector
     // contains at least one value. This is done to ensure that an UMA histogram
     // is uploaded also for the empty case. Hence, AudioEffectType_Other is
     // used to indicate an unknown audio effect and "no audio effect found".
     if (count == 0) {
       const ABI::Windows::Media::Effects::AudioEffectType no_effect_found =
           ABI::Windows::Media::Effects::AudioEffectType::AudioEffectType_Other;
       audio_processing_mode[i] ==
               ABI::Windows::Media::AudioProcessing::AudioProcessing_Default
           ? default_effect_types_.push_back(no_effect_found)
           : raw_effect_types_.push_back(no_effect_found);
     }
   }

   return hr;
 }

 HRESULT WASAPIAudioInputStream::GetAudioEngineNumChannels(WORD* channels) {
   DCHECK(audio_client_.Get());
   SendLogMessage("%s()", __func__);
   WAVEFORMATEXTENSIBLE mix_format;
   // Retrieve the stream format that the audio engine uses for its internal
   // processing of shared-mode streams.
   HRESULT hr =
       CoreAudioUtil::GetSharedModeMixFormat(audio_client_.Get(), &mix_format);
   if (SUCCEEDED(hr)) {
     // Return the native number of supported audio channels.
     CoreAudioUtil::WaveFormatWrapper wformat(&mix_format);
     *channels = wformat->nChannels;
     SendLogMessage("%s => (native channels=[%d])", __func__, *channels);
   }
   return hr;
 }

 HRESULT
 WASAPIAudioInputStream::SetCommunicationsCategoryAndMaybeRawCaptureMode(
     WORD channels) {
   DCHECK(audio_client_.Get());
   DCHECK(!AudioDeviceDescription::IsLoopbackDevice(device_id_));
   DCHECK(raw_processing_supported_);
   SendLogMessage("%s({channels=%d})", __func__, channels);

   Microsoft::WRL::ComPtr<IAudioClient2> audio_client2;
   HRESULT hr = audio_client_.As(&audio_client2);
   if (FAILED(hr)) {
     SendLogMessage("%s => (ERROR: IAudioClient2 is not supported)", __func__);
     return hr;
   }
   // Use IAudioClient2::SetClientProperties() to set communications category
   // and to enable raw stream capture if it is supported.
   if (audio_client2.Get()) {
     AudioClientProperties audio_props = {0};
     audio_props.cbSize = sizeof(AudioClientProperties);
     audio_props.bIsOffload = false;
     // AudioCategory_Communications opts us in to communications policy and
     // communications processing. AUDCLNT_STREAMOPTIONS_RAW turns off the
     // processing, but not the policy.
     audio_props.eCategory = AudioCategory_Communications;
     // The audio stream is a 'raw' stream that bypasses all signal processing
     // except for endpoint specific, always-on processing in the Audio
     // Processing Object (APO), driver, and hardware.
     // See https://crbug.com/1257662 for details on why we avoid using raw
     // capture mode on devices with more than eight input channels.
     if (channels > 0 && channels <= media::kMaxConcurrentChannels) {
       audio_props.Options = AUDCLNT_STREAMOPTIONS_RAW;
     }
     hr = audio_client2->SetClientProperties(&audio_props);
     if (FAILED(hr)) {
       SendLogMessage("%s => (ERROR: IAudioClient2::SetClientProperties=[%s])",
                      __func__, ErrorToString(hr).c_str());
     }
   }
   return hr;
 }

 bool WASAPIAudioInputStream::DesiredFormatIsSupported(HRESULT* hr) {
   SendLogMessage("%s()", __func__);
   // An application that uses WASAPI to manage shared-mode streams can rely
   // on the audio engine to perform only limited format conversions. The audio
   // engine can convert between a standard PCM sample size used by the
   // application and the floating-point samples that the engine uses for its
   // internal processing. However, the format for an application stream
   // typically must have the same number of channels and the same sample
   // rate as the stream format used by the device.
   // Many audio devices support both PCM and non-PCM stream formats. However,
   // the audio engine can mix only PCM streams.
   base::win::ScopedCoMem<WAVEFORMATEX> closest_match;
   HRESULT hresult = audio_client_->IsFormatSupported(
       AUDCLNT_SHAREMODE_SHARED,
       reinterpret_cast<const WAVEFORMATEX*>(&input_format_), &closest_match);
   if (FAILED(hresult)) {
     SendLogMessage("%s => (ERROR: IAudioClient::IsFormatSupported=[%s])",
                    __func__, ErrorToString(hresult).c_str());
   }
   if (hresult == S_FALSE) {
     SendLogMessage(
         "%s => (WARNING: Format is not supported but a closest match exists)",
         __func__);
     // Change the format we're going to ask for to better match with what the OS
     // can provide.  If we succeed in initializing the audio client in this
     // format and are able to convert from this format, we will do that
     // conversion.
     WAVEFORMATEX* input_format = &input_format_.Format;
     input_format->nChannels = closest_match->nChannels;
     input_format->nSamplesPerSec = closest_match->nSamplesPerSec;

     // If the closest match is fixed point PCM (WAVE_FORMAT_PCM or
     // KSDATAFORMAT_SUBTYPE_PCM), we use the closest match's bits per sample.
     // Otherwise, we keep the bits sample as is since we still request fixed
     // point PCM. In that case the closest match is typically in float format
     // (KSDATAFORMAT_SUBTYPE_IEEE_FLOAT).
     if (CoreAudioUtil::WaveFormatWrapper(closest_match.get()).IsPcm()) {
       input_format->wBitsPerSample = closest_match->wBitsPerSample;
     }

     input_format->nBlockAlign =
         (input_format->wBitsPerSample / 8) * input_format->nChannels;
     input_format->nAvgBytesPerSec =
         input_format->nSamplesPerSec * input_format->nBlockAlign;

     if (IsSupportedFormatForConversion(&input_format_)) {
       SendLogMessage(
           "%s => (WARNING: Captured audio will be converted: [%s] ==> [%s])",
           __func__, CoreAudioUtil::WaveFormatToString(&input_format_).c_str(),
           CoreAudioUtil::WaveFormatToString(&output_format_).c_str());
       SetupConverterAndStoreFormatInfo();

       // Indicate that we're good to go with a close match.
       hresult = S_OK;
     }
   }

   // At this point, |hresult| == S_OK if the desired format is supported. If
   // |hresult| == S_FALSE, the OS supports a closest match but we don't support
   // conversion to it. Thus, SUCCEEDED() or FAILED() can't be used to determine
   // if the desired format is supported.
   *hr = hresult;
   return (hresult == S_OK);
 }

 void WASAPIAudioInputStream::SetupConverterAndStoreFormatInfo() {
   // Ideally, we want a 1:1 ratio between the buffers we get and the buffers
   // we give to OnData so that each buffer we receive from the OS can be
   // directly converted to a buffer that matches with what was asked for.
   const double buffer_ratio =
       output_format_.nSamplesPerSec / static_cast<double>(packet_size_frames_);
   double new_frames_per_buffer =
       input_format_.Format.nSamplesPerSec / buffer_ratio;

   const auto input_layout =
       ChannelLayoutConfig::Guess(input_format_.Format.nChannels);
   DCHECK_NE(CHANNEL_LAYOUT_UNSUPPORTED, input_layout.channel_layout());
   const auto output_layout =
       ChannelLayoutConfig::Guess(output_format_.nChannels);
   DCHECK_NE(CHANNEL_LAYOUT_UNSUPPORTED, output_layout.channel_layout());

   const AudioParameters input(AudioParameters::AUDIO_PCM_LOW_LATENCY,
                               input_layout, input_format_.Format.nSamplesPerSec,
                               static_cast<int>(new_frames_per_buffer));

   const AudioParameters output(AudioParameters::AUDIO_PCM_LOW_LATENCY,
                                output_layout, output_format_.nSamplesPerSec,
                                packet_size_frames_);

   converter_ = std::make_unique<AudioConverter>(input, output, false);
   converter_->AddInput(this);
   converter_->PrimeWithSilence();
   convert_bus_ = AudioBus::Create(output);

   // Update our packet size assumptions based on the new format.
   const auto new_bytes_per_buffer = static_cast<int>(new_frames_per_buffer) *
                                     input_format_.Format.nBlockAlign;
   packet_size_frames_ = new_bytes_per_buffer / input_format_.Format.nBlockAlign;
   packet_size_bytes_ = new_bytes_per_buffer;
   frame_size_bytes_ = input_format_.Format.nBlockAlign;

   imperfect_buffer_size_conversion_ =
       std::modf(new_frames_per_buffer, &new_frames_per_buffer) != 0.0;
   if (imperfect_buffer_size_conversion_) {
     SendLogMessage("%s => (WARNING: Audio capture conversion requires a FIFO)",
                    __func__);
   }
 }

 HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {
   DCHECK_EQ(OPEN_RESULT_OK, open_result_);
   SendLogMessage("%s()", __func__);

   DWORD flags;
   // Use event-driven mode only for regular input devices. For loopback the
   // EVENTCALLBACK flag is specified when initializing
   // |audio_render_client_for_loopback_|.
   if (AudioDeviceDescription::IsLoopbackDevice(device_id_)) {
     flags = AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
   } else {
     flags = AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
   }

   // Initialize the audio stream between the client and the device.
   // We connect indirectly through the audio engine by using shared mode.
   // The buffer duration is set to 100 ms, which reduces the risk of glitches.
   // It would normally be set to 0 and the minimum buffer size to ensure that
   // glitches do not occur would be used (typically around 22 ms). There are
   // however cases when there are glitches anyway and it's avoided by setting a
   // larger buffer size. The larger size does not create higher latency for
   // properly implemented drivers.
   HRESULT hr = audio_client_->Initialize(
       AUDCLNT_SHAREMODE_SHARED, flags,
       100 * 1000 * 10,  // Buffer duration, 100 ms expressed in 100-ns units.
       0,                // Device period, n/a for shared mode.
       reinterpret_cast<const WAVEFORMATEX*>(&input_format_),
       AudioDeviceDescription::IsCommunicationsDevice(device_id_)
           ? &kCommunicationsSessionId
           : nullptr);

   if (FAILED(hr)) {
     SendLogMessage("%s => (ERROR: IAudioClient::Initialize=[%s])", __func__,
                    ErrorToString(hr).c_str());
     open_result_ = OPEN_RESULT_AUDIO_CLIENT_INIT_FAILED;
     base::UmaHistogramSparse("Media.Audio.Capture.Win.InitError", hr);
     MaybeReportFormatRelatedInitError(hr);
     return hr;
   }

   // Retrieve the length of the endpoint buffer shared between the client
   // and the audio engine. The buffer length determines the maximum amount
   // of capture data that the audio engine can read from the endpoint buffer
   // during a single processing pass.
   hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_GET_BUFFER_SIZE_FAILED;
     return hr;
   }
   const int endpoint_buffer_size_ms =
       static_cast<double>(endpoint_buffer_size_frames_ * 1000) /
           input_format_.Format.nSamplesPerSec +
       0.5;
   SendLogMessage("%s => (endpoint_buffer_size_frames=%u (%d ms))", __func__,
                  endpoint_buffer_size_frames_, endpoint_buffer_size_ms);

 #ifndef NDEBUG
   // The period between processing passes by the audio engine is fixed for a
   // particular audio endpoint device and represents the smallest processing
   // quantum for the audio engine. This period plus the stream latency between
   // the buffer and endpoint device represents the minimum possible latency
   // that an audio application can achieve.
   REFERENCE_TIME device_period_shared_mode = 0;
   REFERENCE_TIME device_period_exclusive_mode = 0;
   HRESULT hr_dbg = audio_client_->GetDevicePeriod(
       &device_period_shared_mode, &device_period_exclusive_mode);
   if (SUCCEEDED(hr_dbg)) {
     // The 5000 addition is to round end result to closest integer.
     const int device_period_ms = (device_period_shared_mode + 5000) / 10000;
     DVLOG(1) << "Device period: " << device_period_ms << " ms";
   }

   REFERENCE_TIME latency = 0;
   hr_dbg = audio_client_->GetStreamLatency(&latency);
   if (SUCCEEDED(hr_dbg)) {
     // The 5000 addition is to round end result to closest integer.
     const int latency_ms = (device_period_shared_mode + 5000) / 10000;
     DVLOG(1) << "Stream latency: " << latency_ms << " ms";
   }
 #endif

   // Set the event handle that the audio engine will signal each time a buffer
   // becomes ready to be processed by the client.
   //
   // In loopback case the capture device doesn't receive any events, so we
   // need to create a separate playback client to get notifications. According
   // to MSDN:
   //
   //   A pull-mode capture client does not receive any events when a stream is
   //   initialized with event-driven buffering and is loopback-enabled. To
   //   work around this, initialize a render stream in event-driven mode. Each
   //   time the client receives an event for the render stream, it must signal
   //   the capture client to run the capture thread that reads the next set of
   //   samples from the capture endpoint buffer.
   //
   // http://msdn.microsoft.com/en-us/library/windows/desktop/dd316551(v=vs.85).aspx
   if (AudioDeviceDescription::IsLoopbackDevice(device_id_)) {
     SendLogMessage("%s => (WARNING: loopback mode is selected)", __func__);
     hr = endpoint_device_->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr,
                                     &audio_render_client_for_loopback_);
     if (FAILED(hr)) {
       open_result_ = OPEN_RESULT_LOOPBACK_ACTIVATE_FAILED;
       return hr;
     }

     hr = audio_render_client_for_loopback_->Initialize(
         AUDCLNT_SHAREMODE_SHARED,
         AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST, 0, 0,
         reinterpret_cast<const WAVEFORMATEX*>(&input_format_),
         AudioDeviceDescription::IsCommunicationsDevice(device_id_)
             ? &kCommunicationsSessionId
             : nullptr);
     if (FAILED(hr)) {
       open_result_ = OPEN_RESULT_LOOPBACK_INIT_FAILED;
       return hr;
     }

     hr = audio_render_client_for_loopback_->SetEventHandle(
         audio_samples_ready_event_.Get());
   } else {
     hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get());
   }

   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_SET_EVENT_HANDLE;
     return hr;
   }

   // Get access to the IAudioCaptureClient interface. This interface
   // enables us to read input data from the capture endpoint buffer.
   hr = audio_client_->GetService(IID_PPV_ARGS(&audio_capture_client_));
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_NO_CAPTURE_CLIENT;
     return hr;
   }

   // Obtain a reference to the ISimpleAudioVolume interface which enables
   // us to control the master volume level of an audio session.
   hr = audio_client_->GetService(IID_PPV_ARGS(&simple_audio_volume_));
   if (FAILED(hr))
     open_result_ = OPEN_RESULT_NO_AUDIO_VOLUME;

   return hr;
 }

 void WASAPIAudioInputStream::ReportOpenResult(HRESULT hr) {
   DCHECK(!opened_);
   UMA_HISTOGRAM_ENUMERATION("Media.Audio.Capture.Win.Open", open_result_,
                             OPEN_RESULT_MAX + 1);
   if (open_result_ != OPEN_RESULT_OK &&
       open_result_ != OPEN_RESULT_OK_WITH_RESAMPLING) {
     SendLogMessage(
         "%s", GetOpenLogString(open_result_, hr, input_format_, output_format_)
                   .c_str());
   }
 }

 void WASAPIAudioInputStream::MaybeReportFormatRelatedInitError(
     HRESULT hr) const {
   if (hr != AUDCLNT_E_UNSUPPORTED_FORMAT && hr != E_INVALIDARG)
     return;

   const FormatRelatedInitError format_related_error =
       hr == AUDCLNT_E_UNSUPPORTED_FORMAT
           ? converter_.get()
                 ? FormatRelatedInitError::kUnsupportedFormatWithFormatConversion
                 : FormatRelatedInitError::kUnsupportedFormat
       // Otherwise |hr| == E_INVALIDARG.
       : converter_.get()
           ? FormatRelatedInitError::kInvalidArgumentWithFormatConversion
           : FormatRelatedInitError::kInvalidArgument;
   base::UmaHistogramEnumeration(
       "Media.Audio.Capture.Win.InitError.FormatRelated", format_related_error,
       FormatRelatedInitError::kCount);
 }

 double WASAPIAudioInputStream::ProvideInput(
     AudioBus* audio_bus,
     uint32_t frames_delayed,
     const AudioGlitchInfo& glitch_info) {
   fifo_->Consume()->CopyTo(audio_bus);
   return 1.0;
 }

 void WASAPIAudioInputStream::ReportAndResetGlitchStats() {
   glitch_accumulator_.GetAndReset();
   SystemGlitchReporter::Stats stats =
       glitch_reporter_.GetLongTermStatsAndReset();
   SendLogMessage(
       "%s => (num_glitches_detected=[%d], cumulative_audio_lost=[%llu ms], "
       "largest_glitch=[%llu ms])",
       __func__, stats.glitches_detected,
       stats.total_glitch_duration.InMilliseconds(),
       stats.largest_glitch_duration.InMilliseconds());

   int num_data_discontinuities =
       data_discontinuity_reporter_->GetLongTermDiscontinuityCountAndReset();
   SendLogMessage("%s => (discontinuity warnings=[%d])", __func__,
                  num_data_discontinuities);
   SendLogMessage("%s => (timstamp errors=[%" PRIu64 "])", __func__,
                  num_timestamp_errors_);
   if (num_timestamp_errors_ > 0) {
     SendLogMessage("%s => (time until first timestamp error=[%" PRId64 " ms])",
                    __func__,
                    time_until_first_timestamp_error_.InMilliseconds());
   }

   expected_next_device_position_ = 0;
   num_timestamp_errors_ = 0;
 }

 }  // namespace media