src/content/browser/speech/endpointer/energy_endpointer.cc - cobalt - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
 // To know more about the algorithm used and the original code which this is
 // based of, see
 // https://wiki.corp.google.com/twiki/bin/view/Main/ChromeGoogleCodeXRef

 #include "content/browser/speech/endpointer/energy_endpointer.h"

 #include <math.h>
 #include <stddef.h>

 #include "base/logging.h"

 namespace {

 // Returns the RMS (quadratic mean) of the input signal.
 float RMS(const int16_t* samples, int num_samples) {
   int64_t ssq_int64 = 0;
   int64_t sum_int64 = 0;
   for (int i = 0; i < num_samples; ++i) {
     sum_int64 += samples[i];
     ssq_int64 += samples[i] * samples[i];
   }
   // now convert to floats.
   double sum = static_cast<double>(sum_int64);
   sum /= num_samples;
   double ssq = static_cast<double>(ssq_int64);
   return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum)));
 }

 int64_t Secs2Usecs(float seconds) {
   return static_cast<int64_t>(0.5 + (1.0e6 * seconds));
 }

 float GetDecibel(float value) {
   if (value > 1.0e-100)
     return static_cast<float>(20 * log10(value));
   return -2000.0;
 }

 }  // namespace

 namespace content {

 // Stores threshold-crossing histories for making decisions about the speech
 // state.
 class EnergyEndpointer::HistoryRing {
  public:
   HistoryRing() : insertion_index_(0) {}

   // Resets the ring to |size| elements each with state |initial_state|
   void SetRing(int size, bool initial_state);

   // Inserts a new entry into the ring and drops the oldest entry.
   void Insert(int64_t time_us, bool decision);

   // Returns the time in microseconds of the most recently added entry.
   int64_t EndTime() const;

   // Returns the sum of all intervals during which 'decision' is true within
   // the time in seconds specified by 'duration'. The returned interval is
   // in seconds.
   float RingSum(float duration_sec);

  private:
   struct DecisionPoint {
     int64_t time_us;
     bool decision;
   };

   std::vector<DecisionPoint> decision_points_;
   int insertion_index_;  // Index at which the next item gets added/inserted.

   DISALLOW_COPY_AND_ASSIGN(HistoryRing);
 };

 void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) {
   insertion_index_ = 0;
   decision_points_.clear();
   DecisionPoint init = { -1, initial_state };
   decision_points_.resize(static_cast<size_t>(size), init);
 }

 void EnergyEndpointer::HistoryRing::Insert(int64_t time_us, bool decision) {
   decision_points_[static_cast<size_t>(insertion_index_)].time_us = time_us;
   decision_points_[static_cast<size_t>(insertion_index_)].decision = decision;
   insertion_index_ =
       static_cast<int>((insertion_index_ + 1) % decision_points_.size());
 }

 int64_t EnergyEndpointer::HistoryRing::EndTime() const {
   int ind = insertion_index_ - 1;
   if (ind < 0)
     ind = static_cast<int>(decision_points_.size() - 1);
   return decision_points_[static_cast<size_t>(ind)].time_us;
 }

 float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) {
   if (decision_points_.empty())
     return 0.0;

   int64_t sum_us = 0;
   int ind = insertion_index_ - 1;
   if (ind < 0)
     ind = static_cast<int>(decision_points_.size() - 1);
   int64_t end_us = decision_points_[static_cast<size_t>(ind)].time_us;
   bool is_on = decision_points_[static_cast<size_t>(ind)].decision;
   int64_t start_us =
       end_us - static_cast<int64_t>(0.5 + (1.0e6 * duration_sec));
   if (start_us < 0)
     start_us = 0;
   size_t n_summed = 1;  // n points ==> (n-1) intervals
   while ((decision_points_[static_cast<size_t>(ind)].time_us > start_us) &&
          (n_summed < decision_points_.size())) {
     --ind;
     if (ind < 0)
       ind = static_cast<int>(decision_points_.size() - 1);
     if (is_on)
       sum_us += end_us - decision_points_[static_cast<size_t>(ind)].time_us;
     is_on = decision_points_[static_cast<size_t>(ind)].decision;
     end_us = decision_points_[static_cast<size_t>(ind)].time_us;
     n_summed++;
   }

   return 1.0e-6f * sum_us;  //  Returns total time that was super threshold.
 }

 EnergyEndpointer::EnergyEndpointer()
     : status_(EP_PRE_SPEECH),
       offset_confirm_dur_sec_(0),
       endpointer_time_us_(0),
       fast_update_frames_(0),
       frame_counter_(0),
       max_window_dur_(4.0),
       sample_rate_(0),
       history_(new HistoryRing()),
       decision_threshold_(0),
       estimating_environment_(false),
       noise_level_(0),
       rms_adapt_(0),
       start_lag_(0),
       end_lag_(0),
       user_input_start_time_us_(0) {
 }

 EnergyEndpointer::~EnergyEndpointer() {
 }

 int EnergyEndpointer::TimeToFrame(float time) const {
   return static_cast<int32_t>(0.5 + (time / params_.frame_period()));
 }

 void EnergyEndpointer::Restart(bool reset_threshold) {
   status_ = EP_PRE_SPEECH;
   user_input_start_time_us_ = 0;

   if (reset_threshold) {
     decision_threshold_ = params_.decision_threshold();
     rms_adapt_ = decision_threshold_;
     noise_level_ = params_.decision_threshold() / 2.0f;
     frame_counter_ = 0;  // Used for rapid initial update of levels.
   }

   // Set up the memories to hold the history windows.
   history_->SetRing(TimeToFrame(max_window_dur_), false);

   // Flag that indicates that current input should be used for
   // estimating the environment. The user has not yet started input
   // by e.g. pressed the push-to-talk button. By default, this is
   // false for backward compatibility.
   estimating_environment_ = false;
 }

 void EnergyEndpointer::Init(const EnergyEndpointerParams& params) {
   params_ = params;

   // Find the longest history interval to be used, and make the ring
   // large enough to accommodate that number of frames.  NOTE: This
   // depends upon ep_frame_period being set correctly in the factory
   // that did this instantiation.
   max_window_dur_ = params_.onset_window();
   if (params_.speech_on_window() > max_window_dur_)
     max_window_dur_ = params_.speech_on_window();
   if (params_.offset_window() > max_window_dur_)
     max_window_dur_ = params_.offset_window();
   Restart(true);

   offset_confirm_dur_sec_ = params_.offset_window() -
                             params_.offset_confirm_dur();
   if (offset_confirm_dur_sec_ < 0.0)
     offset_confirm_dur_sec_ = 0.0;

   user_input_start_time_us_ = 0;

   // Flag that indicates that  current input should be used for
   // estimating the environment. The user has not yet started input
   // by e.g. pressed the push-to-talk button. By default, this is
   // false for backward compatibility.
   estimating_environment_ = false;
   // The initial value of the noise and speech levels is inconsequential.
   // The level of the first frame will overwrite these values.
   noise_level_ = params_.decision_threshold() / 2.0f;
   fast_update_frames_ =
       static_cast<int64_t>(params_.fast_update_dur() / params_.frame_period());

   frame_counter_ = 0;  // Used for rapid initial update of levels.

   sample_rate_ = params_.sample_rate();
   start_lag_ = static_cast<int>(sample_rate_ /
                                 params_.max_fundamental_frequency());
   end_lag_ = static_cast<int>(sample_rate_ /
                               params_.min_fundamental_frequency());
 }

 void EnergyEndpointer::StartSession() {
   Restart(true);
 }

 void EnergyEndpointer::EndSession() {
   status_ = EP_POST_SPEECH;
 }

 void EnergyEndpointer::SetEnvironmentEstimationMode() {
   Restart(true);
   estimating_environment_ = true;
 }

 void EnergyEndpointer::SetUserInputMode() {
   estimating_environment_ = false;
   user_input_start_time_us_ = endpointer_time_us_;
 }

 void EnergyEndpointer::ProcessAudioFrame(int64_t time_us,
                                          const int16_t* samples,
                                          int num_samples,
                                          float* rms_out) {
   endpointer_time_us_ = time_us;
   float rms = RMS(samples, num_samples);

   // Check that this is user input audio vs. pre-input adaptation audio.
   // Input audio starts when the user indicates start of input, by e.g.
   // pressing push-to-talk. Audio received prior to that is used to update
   // noise and speech level estimates.
   if (!estimating_environment_) {
     bool decision = false;
     if ((endpointer_time_us_ - user_input_start_time_us_) <
         Secs2Usecs(params_.contamination_rejection_period())) {
       decision = false;
       DVLOG(1) << "decision: forced to false, time: " << endpointer_time_us_;
     } else {
       decision = (rms > decision_threshold_);
     }

     history_->Insert(endpointer_time_us_, decision);

     switch (status_) {
       case EP_PRE_SPEECH:
         if (history_->RingSum(params_.onset_window()) >
             params_.onset_detect_dur()) {
           status_ = EP_POSSIBLE_ONSET;
         }
         break;

       case EP_POSSIBLE_ONSET: {
         float tsum = history_->RingSum(params_.onset_window());
         if (tsum > params_.onset_confirm_dur()) {
           status_ = EP_SPEECH_PRESENT;
         } else {  // If signal is not maintained, drop back to pre-speech.
           if (tsum <= params_.onset_detect_dur())
             status_ = EP_PRE_SPEECH;
         }
         break;
       }

       case EP_SPEECH_PRESENT: {
         // To induce hysteresis in the state residency, we allow a
         // smaller residency time in the on_ring, than was required to
         // enter the SPEECH_PERSENT state.
         float on_time = history_->RingSum(params_.speech_on_window());
         if (on_time < params_.on_maintain_dur())
           status_ = EP_POSSIBLE_OFFSET;
         break;
       }

       case EP_POSSIBLE_OFFSET:
         if (history_->RingSum(params_.offset_window()) <=
             offset_confirm_dur_sec_) {
           // Note that this offset time may be beyond the end
           // of the input buffer in a real-time system.  It will be up
           // to the RecognizerSession to decide what to do.
           status_ = EP_PRE_SPEECH;  // Automatically reset for next utterance.
         } else {  // If speech picks up again we allow return to SPEECH_PRESENT.
           if (history_->RingSum(params_.speech_on_window()) >=
               params_.on_maintain_dur())
             status_ = EP_SPEECH_PRESENT;
         }
         break;

       case EP_POST_SPEECH:
         // fall-through
       default:
         LOG(WARNING) << "Invalid case in switch: " << status_;
         break;
     }

     // If this is a quiet, non-speech region, slowly adapt the detection
     // threshold to be about 6dB above the average RMS.
     if ((!decision) && (status_ == EP_PRE_SPEECH)) {
       decision_threshold_ = (0.98f * decision_threshold_) + (0.02f * 2 * rms);
       rms_adapt_ = decision_threshold_;
     } else {
       // If this is in a speech region, adapt the decision threshold to
       // be about 10dB below the average RMS. If the noise level is high,
       // the threshold is pushed up.
       // Adaptation up to a higher level is 5 times faster than decay to
       // a lower level.
       if ((status_ == EP_SPEECH_PRESENT) && decision) {
         if (rms_adapt_ > rms) {
           rms_adapt_ = (0.99f * rms_adapt_) + (0.01f * rms);
         } else {
           rms_adapt_ = (0.95f * rms_adapt_) + (0.05f * rms);
         }
         float target_threshold = 0.3f * rms_adapt_ +  noise_level_;
         decision_threshold_ = (.90f * decision_threshold_) +
                               (0.10f * target_threshold);
       }
     }

     // Set a floor
     if (decision_threshold_ < params_.min_decision_threshold())
       decision_threshold_ = params_.min_decision_threshold();
   }

   // Update speech and noise levels.
   UpdateLevels(rms);
   ++frame_counter_;

   if (rms_out)
     *rms_out = GetDecibel(rms);
 }

 float EnergyEndpointer::GetNoiseLevelDb() const {
   return GetDecibel(noise_level_);
 }

 void EnergyEndpointer::UpdateLevels(float rms) {
   // Update quickly initially. We assume this is noise and that
   // speech is 6dB above the noise.
   if (frame_counter_ < fast_update_frames_) {
     // Alpha increases from 0 to (k-1)/k where k is the number of time
     // steps in the initial adaptation period.
     float alpha = static_cast<float>(frame_counter_) /
         static_cast<float>(fast_update_frames_);
     noise_level_ = (alpha * noise_level_) + ((1 - alpha) * rms);
     DVLOG(1) << "FAST UPDATE, frame_counter_ " << frame_counter_
              << ", fast_update_frames_ " << fast_update_frames_;
   } else {
     // Update Noise level. The noise level adapts quickly downward, but
     // slowly upward. The noise_level_ parameter is not currently used
     // for threshold adaptation. It is used for UI feedback.
     if (noise_level_ < rms)
       noise_level_ = (0.999f * noise_level_) + (0.001f * rms);
     else
       noise_level_ = (0.95f * noise_level_) + (0.05f * rms);
   }
   if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) {
     decision_threshold_ = noise_level_ * 2;  // 6dB above noise level.
     // Set a floor
     if (decision_threshold_ < params_.min_decision_threshold())
       decision_threshold_ = params_.min_decision_threshold();
   }
 }

 EpStatus EnergyEndpointer::Status(int64_t* status_time) const {
   *status_time = history_->EndTime();
   return status_;
 }

 }  // namespace content
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	//
	// To know more about the algorithm used and the original code which this is
	// based of, see
	// https://wiki.corp.google.com/twiki/bin/view/Main/ChromeGoogleCodeXRef

	#include "content/browser/speech/endpointer/energy_endpointer.h"

	#include <math.h>
	#include <stddef.h>

	#include "base/logging.h"

	namespace {

	// Returns the RMS (quadratic mean) of the input signal.
	float RMS(const int16_t* samples, int num_samples) {
	int64_t ssq_int64 = 0;
	int64_t sum_int64 = 0;
	for (int i = 0; i < num_samples; ++i) {
	sum_int64 += samples[i];
	ssq_int64 += samples[i] * samples[i];
	}
	// now convert to floats.
	double sum = static_cast<double>(sum_int64);
	sum /= num_samples;
	double ssq = static_cast<double>(ssq_int64);
	return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum)));
	}

	int64_t Secs2Usecs(float seconds) {
	return static_cast<int64_t>(0.5 + (1.0e6 * seconds));
	}

	float GetDecibel(float value) {
	if (value > 1.0e-100)
	return static_cast<float>(20 * log10(value));
	return -2000.0;
	}

	} // namespace

	namespace content {

	// Stores threshold-crossing histories for making decisions about the speech
	// state.
	class EnergyEndpointer::HistoryRing {
	public:
	HistoryRing() : insertion_index_(0) {}

	// Resets the ring to \|size\| elements each with state \|initial_state\|
	void SetRing(int size, bool initial_state);

	// Inserts a new entry into the ring and drops the oldest entry.
	void Insert(int64_t time_us, bool decision);

	// Returns the time in microseconds of the most recently added entry.
	int64_t EndTime() const;

	// Returns the sum of all intervals during which 'decision' is true within
	// the time in seconds specified by 'duration'. The returned interval is
	// in seconds.
	float RingSum(float duration_sec);

	private:
	struct DecisionPoint {
	int64_t time_us;
	bool decision;
	};

	std::vector<DecisionPoint> decision_points_;
	int insertion_index_; // Index at which the next item gets added/inserted.

	DISALLOW_COPY_AND_ASSIGN(HistoryRing);
	};

	void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) {
	insertion_index_ = 0;
	decision_points_.clear();
	DecisionPoint init = { -1, initial_state };
	decision_points_.resize(static_cast<size_t>(size), init);
	}

	void EnergyEndpointer::HistoryRing::Insert(int64_t time_us, bool decision) {
	decision_points_[static_cast<size_t>(insertion_index_)].time_us = time_us;
	decision_points_[static_cast<size_t>(insertion_index_)].decision = decision;
	insertion_index_ =
	static_cast<int>((insertion_index_ + 1) % decision_points_.size());
	}

	int64_t EnergyEndpointer::HistoryRing::EndTime() const {
	int ind = insertion_index_ - 1;
	if (ind < 0)
	ind = static_cast<int>(decision_points_.size() - 1);
	return decision_points_[static_cast<size_t>(ind)].time_us;
	}

	float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) {
	if (decision_points_.empty())
	return 0.0;

	int64_t sum_us = 0;
	int ind = insertion_index_ - 1;
	if (ind < 0)
	ind = static_cast<int>(decision_points_.size() - 1);
	int64_t end_us = decision_points_[static_cast<size_t>(ind)].time_us;
	bool is_on = decision_points_[static_cast<size_t>(ind)].decision;
	int64_t start_us =
	end_us - static_cast<int64_t>(0.5 + (1.0e6 * duration_sec));
	if (start_us < 0)
	start_us = 0;
	size_t n_summed = 1; // n points ==> (n-1) intervals
	while ((decision_points_[static_cast<size_t>(ind)].time_us > start_us) &&
	(n_summed < decision_points_.size())) {
	--ind;
	if (ind < 0)
	ind = static_cast<int>(decision_points_.size() - 1);
	if (is_on)
	sum_us += end_us - decision_points_[static_cast<size_t>(ind)].time_us;
	is_on = decision_points_[static_cast<size_t>(ind)].decision;
	end_us = decision_points_[static_cast<size_t>(ind)].time_us;
	n_summed++;
	}

	return 1.0e-6f * sum_us; // Returns total time that was super threshold.
	}

	EnergyEndpointer::EnergyEndpointer()
	: status_(EP_PRE_SPEECH),
	offset_confirm_dur_sec_(0),
	endpointer_time_us_(0),
	fast_update_frames_(0),
	frame_counter_(0),
	max_window_dur_(4.0),
	sample_rate_(0),
	history_(new HistoryRing()),
	decision_threshold_(0),
	estimating_environment_(false),
	noise_level_(0),
	rms_adapt_(0),
	start_lag_(0),
	end_lag_(0),
	user_input_start_time_us_(0) {
	}

	EnergyEndpointer::~EnergyEndpointer() {
	}

	int EnergyEndpointer::TimeToFrame(float time) const {
	return static_cast<int32_t>(0.5 + (time / params_.frame_period()));
	}

	void EnergyEndpointer::Restart(bool reset_threshold) {
	status_ = EP_PRE_SPEECH;
	user_input_start_time_us_ = 0;

	if (reset_threshold) {
	decision_threshold_ = params_.decision_threshold();
	rms_adapt_ = decision_threshold_;
	noise_level_ = params_.decision_threshold() / 2.0f;
	frame_counter_ = 0; // Used for rapid initial update of levels.
	}

	// Set up the memories to hold the history windows.
	history_->SetRing(TimeToFrame(max_window_dur_), false);

	// Flag that indicates that current input should be used for
	// estimating the environment. The user has not yet started input
	// by e.g. pressed the push-to-talk button. By default, this is
	// false for backward compatibility.
	estimating_environment_ = false;
	}

	void EnergyEndpointer::Init(const EnergyEndpointerParams& params) {
	params_ = params;

	// Find the longest history interval to be used, and make the ring
	// large enough to accommodate that number of frames. NOTE: This
	// depends upon ep_frame_period being set correctly in the factory
	// that did this instantiation.
	max_window_dur_ = params_.onset_window();
	if (params_.speech_on_window() > max_window_dur_)
	max_window_dur_ = params_.speech_on_window();
	if (params_.offset_window() > max_window_dur_)
	max_window_dur_ = params_.offset_window();
	Restart(true);

	offset_confirm_dur_sec_ = params_.offset_window() -
	params_.offset_confirm_dur();
	if (offset_confirm_dur_sec_ < 0.0)
	offset_confirm_dur_sec_ = 0.0;

	user_input_start_time_us_ = 0;

	// Flag that indicates that current input should be used for
	// estimating the environment. The user has not yet started input
	// by e.g. pressed the push-to-talk button. By default, this is
	// false for backward compatibility.
	estimating_environment_ = false;
	// The initial value of the noise and speech levels is inconsequential.
	// The level of the first frame will overwrite these values.
	noise_level_ = params_.decision_threshold() / 2.0f;
	fast_update_frames_ =
	static_cast<int64_t>(params_.fast_update_dur() / params_.frame_period());

	frame_counter_ = 0; // Used for rapid initial update of levels.

	sample_rate_ = params_.sample_rate();
	start_lag_ = static_cast<int>(sample_rate_ /
	params_.max_fundamental_frequency());
	end_lag_ = static_cast<int>(sample_rate_ /
	params_.min_fundamental_frequency());
	}

	void EnergyEndpointer::StartSession() {
	Restart(true);
	}

	void EnergyEndpointer::EndSession() {
	status_ = EP_POST_SPEECH;
	}

	void EnergyEndpointer::SetEnvironmentEstimationMode() {
	Restart(true);
	estimating_environment_ = true;
	}

	void EnergyEndpointer::SetUserInputMode() {
	estimating_environment_ = false;
	user_input_start_time_us_ = endpointer_time_us_;
	}

	void EnergyEndpointer::ProcessAudioFrame(int64_t time_us,
	const int16_t* samples,
	int num_samples,
	float* rms_out) {
	endpointer_time_us_ = time_us;
	float rms = RMS(samples, num_samples);

	// Check that this is user input audio vs. pre-input adaptation audio.
	// Input audio starts when the user indicates start of input, by e.g.
	// pressing push-to-talk. Audio received prior to that is used to update
	// noise and speech level estimates.
	if (!estimating_environment_) {
	bool decision = false;
	if ((endpointer_time_us_ - user_input_start_time_us_) <
	Secs2Usecs(params_.contamination_rejection_period())) {
	decision = false;
	DVLOG(1) << "decision: forced to false, time: " << endpointer_time_us_;
	} else {
	decision = (rms > decision_threshold_);
	}

	history_->Insert(endpointer_time_us_, decision);

	switch (status_) {
	case EP_PRE_SPEECH:
	if (history_->RingSum(params_.onset_window()) >
	params_.onset_detect_dur()) {
	status_ = EP_POSSIBLE_ONSET;
	}
	break;

	case EP_POSSIBLE_ONSET: {
	float tsum = history_->RingSum(params_.onset_window());
	if (tsum > params_.onset_confirm_dur()) {
	status_ = EP_SPEECH_PRESENT;
	} else { // If signal is not maintained, drop back to pre-speech.
	if (tsum <= params_.onset_detect_dur())
	status_ = EP_PRE_SPEECH;
	}
	break;
	}

	case EP_SPEECH_PRESENT: {
	// To induce hysteresis in the state residency, we allow a
	// smaller residency time in the on_ring, than was required to
	// enter the SPEECH_PERSENT state.
	float on_time = history_->RingSum(params_.speech_on_window());
	if (on_time < params_.on_maintain_dur())
	status_ = EP_POSSIBLE_OFFSET;
	break;
	}

	case EP_POSSIBLE_OFFSET:
	if (history_->RingSum(params_.offset_window()) <=
	offset_confirm_dur_sec_) {
	// Note that this offset time may be beyond the end
	// of the input buffer in a real-time system. It will be up
	// to the RecognizerSession to decide what to do.
	status_ = EP_PRE_SPEECH; // Automatically reset for next utterance.
	} else { // If speech picks up again we allow return to SPEECH_PRESENT.
	if (history_->RingSum(params_.speech_on_window()) >=
	params_.on_maintain_dur())
	status_ = EP_SPEECH_PRESENT;
	}
	break;

	case EP_POST_SPEECH:
	// fall-through
	default:
	LOG(WARNING) << "Invalid case in switch: " << status_;
	break;
	}

	// If this is a quiet, non-speech region, slowly adapt the detection
	// threshold to be about 6dB above the average RMS.
	if ((!decision) && (status_ == EP_PRE_SPEECH)) {
	decision_threshold_ = (0.98f * decision_threshold_) + (0.02f * 2 * rms);
	rms_adapt_ = decision_threshold_;
	} else {
	// If this is in a speech region, adapt the decision threshold to
	// be about 10dB below the average RMS. If the noise level is high,
	// the threshold is pushed up.
	// Adaptation up to a higher level is 5 times faster than decay to
	// a lower level.
	if ((status_ == EP_SPEECH_PRESENT) && decision) {
	if (rms_adapt_ > rms) {
	rms_adapt_ = (0.99f * rms_adapt_) + (0.01f * rms);
	} else {
	rms_adapt_ = (0.95f * rms_adapt_) + (0.05f * rms);
	}
	float target_threshold = 0.3f * rms_adapt_ + noise_level_;
	decision_threshold_ = (.90f * decision_threshold_) +
	(0.10f * target_threshold);
	}
	}

	// Set a floor
	if (decision_threshold_ < params_.min_decision_threshold())
	decision_threshold_ = params_.min_decision_threshold();
	}

	// Update speech and noise levels.
	UpdateLevels(rms);
	++frame_counter_;

	if (rms_out)
	*rms_out = GetDecibel(rms);
	}

	float EnergyEndpointer::GetNoiseLevelDb() const {
	return GetDecibel(noise_level_);
	}

	void EnergyEndpointer::UpdateLevels(float rms) {
	// Update quickly initially. We assume this is noise and that
	// speech is 6dB above the noise.
	if (frame_counter_ < fast_update_frames_) {
	// Alpha increases from 0 to (k-1)/k where k is the number of time
	// steps in the initial adaptation period.
	float alpha = static_cast<float>(frame_counter_) /
	static_cast<float>(fast_update_frames_);
	noise_level_ = (alpha * noise_level_) + ((1 - alpha) * rms);
	DVLOG(1) << "FAST UPDATE, frame_counter_ " << frame_counter_
	<< ", fast_update_frames_ " << fast_update_frames_;
	} else {
	// Update Noise level. The noise level adapts quickly downward, but
	// slowly upward. The noise_level_ parameter is not currently used
	// for threshold adaptation. It is used for UI feedback.
	if (noise_level_ < rms)
	noise_level_ = (0.999f * noise_level_) + (0.001f * rms);
	else
	noise_level_ = (0.95f * noise_level_) + (0.05f * rms);
	}
	if (estimating_environment_ \|\| (frame_counter_ < fast_update_frames_)) {
	decision_threshold_ = noise_level_ * 2; // 6dB above noise level.
	// Set a floor
	if (decision_threshold_ < params_.min_decision_threshold())
	decision_threshold_ = params_.min_decision_threshold();
	}
	}

	EpStatus EnergyEndpointer::Status(int64_t* status_time) const {
	*status_time = history_->EndTime();
	return status_;
	}

	} // namespace content