Import Cobalt 4.13245
diff --git a/src/cobalt/build/build.id b/src/cobalt/build/build.id
index b75476d..a658c30 100644
--- a/src/cobalt/build/build.id
+++ b/src/cobalt/build/build.id
@@ -1 +1 @@
-13153
\ No newline at end of file
+13245
\ No newline at end of file
diff --git a/src/cobalt/script/mozjs/mozjs_engine.cc b/src/cobalt/script/mozjs/mozjs_engine.cc
index a2870e8..0acb744 100644
--- a/src/cobalt/script/mozjs/mozjs_engine.cc
+++ b/src/cobalt/script/mozjs/mozjs_engine.cc
@@ -100,7 +100,7 @@
} // namespace
-MozjsEngine::MozjsEngine() {
+MozjsEngine::MozjsEngine() : accumulated_extra_memory_cost_(0) {
// TODO: Investigate the benefit of helper threads and things like
// parallel compilation.
runtime_ =
@@ -156,7 +156,14 @@
JS_GC(runtime_);
}
-void MozjsEngine::ReportExtraMemoryCost(size_t bytes) { NOTIMPLEMENTED(); }
+void MozjsEngine::ReportExtraMemoryCost(size_t bytes) {
+ DCHECK(thread_checker_.CalledOnValidThread());
+ accumulated_extra_memory_cost_ += bytes;
+ if (accumulated_extra_memory_cost_ > kGarbageCollectionThresholdBytes) {
+ accumulated_extra_memory_cost_ = 0;
+ CollectGarbage();
+ }
+}
size_t MozjsEngine::UpdateMemoryStatsAndReturnReserved() {
return EngineStats::GetInstance()->UpdateMemoryStatsAndReturnReserved();
@@ -182,6 +189,9 @@
void MozjsEngine::GCCallback(JSRuntime* runtime, JSGCStatus status) {
MozjsEngine* engine =
static_cast<MozjsEngine*>(JS_GetRuntimePrivate(runtime));
+ if (status == JSGC_END) {
+ engine->accumulated_extra_memory_cost_ = 0;
+ }
for (int i = 0; i < engine->contexts_.size(); ++i) {
MozjsGlobalEnvironment* global_environment =
MozjsGlobalEnvironment::GetFromContext(engine->contexts_[i]);
diff --git a/src/cobalt/script/mozjs/mozjs_engine.h b/src/cobalt/script/mozjs/mozjs_engine.h
index 2a86ce5..393f98d 100644
--- a/src/cobalt/script/mozjs/mozjs_engine.h
+++ b/src/cobalt/script/mozjs/mozjs_engine.h
@@ -51,6 +51,9 @@
// A list of all contexts created for this JSRuntime.
typedef std::vector<JSContext*> ContextVector;
ContextVector contexts_;
+
+ // The amount of externally allocated memory since last forced GC.
+ size_t accumulated_extra_memory_cost_;
};
} // namespace mozjs
} // namespace script
diff --git a/src/cobalt/speech/endpointer/endpointer.cc b/src/cobalt/speech/endpointer/endpointer.cc
new file mode 100644
index 0000000..b0c09e9
--- /dev/null
+++ b/src/cobalt/speech/endpointer/endpointer.cc
@@ -0,0 +1,187 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cobalt/speech/endpointer/endpointer.h"
+
+#include "base/time.h"
+
+using base::Time;
+
+namespace {
+const int kFrameRate = 50; // 1 frame = 20ms of audio.
+}
+
+namespace cobalt {
+namespace speech {
+
+Endpointer::Endpointer(int sample_rate)
+ : speech_input_possibly_complete_silence_length_us_(-1),
+ speech_input_complete_silence_length_us_(-1),
+ audio_frame_time_us_(0),
+ sample_rate_(sample_rate),
+ frame_size_(0) {
+ Reset();
+
+ frame_size_ = static_cast<int>(sample_rate / static_cast<float>(kFrameRate));
+
+ speech_input_minimum_length_us_ =
+ static_cast<int64_t>(1.7 * Time::kMicrosecondsPerSecond);
+ speech_input_complete_silence_length_us_ =
+ static_cast<int64_t>(0.5 * Time::kMicrosecondsPerSecond);
+ long_speech_input_complete_silence_length_us_ = -1;
+ long_speech_length_us_ = -1;
+ speech_input_possibly_complete_silence_length_us_ =
+ 1 * Time::kMicrosecondsPerSecond;
+
+ // Set the default configuration for Push To Talk mode.
+ EnergyEndpointerParams ep_config;
+ ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
+ ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
+ ep_config.set_endpoint_margin(0.2f);
+ ep_config.set_onset_window(0.15f);
+ ep_config.set_speech_on_window(0.4f);
+ ep_config.set_offset_window(0.15f);
+ ep_config.set_onset_detect_dur(0.09f);
+ ep_config.set_onset_confirm_dur(0.075f);
+ ep_config.set_on_maintain_dur(0.10f);
+ ep_config.set_offset_confirm_dur(0.12f);
+ ep_config.set_decision_threshold(1000.0f);
+ ep_config.set_min_decision_threshold(50.0f);
+ ep_config.set_fast_update_dur(0.2f);
+ ep_config.set_sample_rate(static_cast<float>(sample_rate));
+ ep_config.set_min_fundamental_frequency(57.143f);
+ ep_config.set_max_fundamental_frequency(400.0f);
+ ep_config.set_contamination_rejection_period(0.25f);
+ energy_endpointer_.Init(ep_config);
+}
+
+void Endpointer::Reset() {
+ old_ep_status_ = EP_PRE_SPEECH;
+ waiting_for_speech_possibly_complete_timeout_ = false;
+ waiting_for_speech_complete_timeout_ = false;
+ speech_previously_detected_ = false;
+ speech_input_complete_ = false;
+ audio_frame_time_us_ = 0; // Reset time for packets sent to endpointer.
+ speech_end_time_us_ = -1;
+ speech_start_time_us_ = -1;
+}
+
+void Endpointer::StartSession() {
+ Reset();
+ energy_endpointer_.StartSession();
+}
+
+void Endpointer::EndSession() {
+ energy_endpointer_.EndSession();
+}
+
+void Endpointer::SetEnvironmentEstimationMode() {
+ Reset();
+ energy_endpointer_.SetEnvironmentEstimationMode();
+}
+
+void Endpointer::SetUserInputMode() {
+ energy_endpointer_.SetUserInputMode();
+}
+
+EpStatus Endpointer::Status(int64_t* time) {
+ return energy_endpointer_.Status(time);
+}
+
+EpStatus Endpointer::ProcessAudio(
+ const ShellAudioBus& audio_bus, float* rms_out) {
+ DCHECK_EQ(audio_bus.channels(), 1);
+ const size_t num_samples = audio_bus.frames();
+ const int16_t* audio_data = NULL;
+
+ ShellAudioBus int16_audio_bus(1, num_samples, ShellAudioBus::kInt16,
+ ShellAudioBus::kInterleaved);
+
+ if (audio_bus.sample_type() == ShellAudioBus::kFloat32) {
+ int16_audio_bus.Assign(audio_bus);
+ DCHECK_EQ(int16_audio_bus.sample_type(), ShellAudioBus::kInt16);
+ audio_data =
+ reinterpret_cast<const int16_t*>(int16_audio_bus.interleaved_data());
+ } else {
+ DCHECK_EQ(audio_bus.sample_type(), ShellAudioBus::kInt16);
+ audio_data =
+ reinterpret_cast<const int16_t*>(audio_bus.interleaved_data());
+ }
+
+ EpStatus ep_status = EP_PRE_SPEECH;
+
+ // Process the input data in blocks of frame_size_, dropping any incomplete
+ // frames at the end (which is ok since typically the caller will be recording
+ // audio in multiples of our frame size).
+ int sample_index = 0;
+ while (static_cast<size_t>(sample_index + frame_size_) <= num_samples) {
+ // Have the endpointer process the frame.
+ energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_,
+ audio_data + sample_index,
+ frame_size_,
+ rms_out);
+ sample_index += frame_size_;
+ audio_frame_time_us_ += (frame_size_ * Time::kMicrosecondsPerSecond) /
+ sample_rate_;
+
+ // Get the status of the endpointer.
+ int64_t ep_time;
+ ep_status = energy_endpointer_.Status(&ep_time);
+
+ // Handle state changes.
+ if ((EP_SPEECH_PRESENT == ep_status) &&
+ (EP_POSSIBLE_ONSET == old_ep_status_)) {
+ speech_end_time_us_ = -1;
+ waiting_for_speech_possibly_complete_timeout_ = false;
+ waiting_for_speech_complete_timeout_ = false;
+ // Trigger SpeechInputDidStart event on first detection.
+ if (false == speech_previously_detected_) {
+ speech_previously_detected_ = true;
+ speech_start_time_us_ = ep_time;
+ }
+ }
+ if ((EP_PRE_SPEECH == ep_status) &&
+ (EP_POSSIBLE_OFFSET == old_ep_status_)) {
+ speech_end_time_us_ = ep_time;
+ waiting_for_speech_possibly_complete_timeout_ = true;
+ waiting_for_speech_complete_timeout_ = true;
+ }
+ if (ep_time > speech_input_minimum_length_us_) {
+ // Speech possibly complete timeout.
+ if ((waiting_for_speech_possibly_complete_timeout_) &&
+ (ep_time - speech_end_time_us_ >
+ speech_input_possibly_complete_silence_length_us_)) {
+ waiting_for_speech_possibly_complete_timeout_ = false;
+ }
+ if (waiting_for_speech_complete_timeout_) {
+ // The length of the silence timeout period can be held constant, or it
+ // can be changed after a fixed amount of time from the beginning of
+ // speech.
+ bool has_stepped_silence =
+ (long_speech_length_us_ > 0) &&
+ (long_speech_input_complete_silence_length_us_ > 0);
+ int64_t requested_silence_length;
+ if (has_stepped_silence &&
+ (ep_time - speech_start_time_us_) > long_speech_length_us_) {
+ requested_silence_length =
+ long_speech_input_complete_silence_length_us_;
+ } else {
+ requested_silence_length =
+ speech_input_complete_silence_length_us_;
+ }
+
+ // Speech complete timeout.
+ if ((ep_time - speech_end_time_us_) > requested_silence_length) {
+ waiting_for_speech_complete_timeout_ = false;
+ speech_input_complete_ = true;
+ }
+ }
+ }
+ old_ep_status_ = ep_status;
+ }
+ return ep_status;
+}
+
+} // namespace speech
+} // namespace cobalt
diff --git a/src/cobalt/speech/endpointer/endpointer.h b/src/cobalt/speech/endpointer/endpointer.h
new file mode 100644
index 0000000..b8d46f5
--- /dev/null
+++ b/src/cobalt/speech/endpointer/endpointer.h
@@ -0,0 +1,156 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COBALT_SPEECH_ENDPOINTER_ENDPOINTER_H_
+#define COBALT_SPEECH_ENDPOINTER_ENDPOINTER_H_
+
+#include <stdint.h>
+
+#include "cobalt/speech/endpointer/energy_endpointer.h"
+#include "media/base/shell_audio_bus.h"
+
+class EpStatus;
+
+namespace cobalt {
+namespace speech {
+
+// A simple interface to the underlying energy-endpointer implementation, this
+// class lets callers provide audio as being recorded and let them poll to find
+// when the user has stopped speaking.
+//
+// There are two events that may trigger the end of speech:
+//
+// speechInputPossiblyComplete event:
+//
+// Signals that silence/noise has been detected for a *short* amount of
+// time after some speech has been detected. It can be used for low latency
+// UI feedback. To disable it, set it to a large amount.
+//
+// speechInputComplete event:
+//
+// This event is intended to signal end of input and to stop recording.
+// The amount of time to wait after speech is set by
+// speech_input_complete_silence_length_ and optionally two other
+// parameters (see below).
+// This time can be held constant, or can change as more speech is detected.
+// In the latter case, the time changes after a set amount of time from the
+// *beginning* of speech. This is motivated by the expectation that there
+// will be two distinct types of inputs: short search queries and longer
+// dictation style input.
+//
+// Three parameters are used to define the piecewise constant timeout function.
+// The timeout length is speech_input_complete_silence_length until
+// long_speech_length, when it changes to
+// long_speech_input_complete_silence_length.
+class Endpointer {
+ public:
+ typedef ::media::ShellAudioBus ShellAudioBus;
+
+ explicit Endpointer(int sample_rate);
+
+ // Start the endpointer. This should be called at the beginning of a session.
+ void StartSession();
+
+ // Stop the endpointer.
+ void EndSession();
+
+ // Start environment estimation. Audio will be used for environment estimation
+ // i.e. noise level estimation.
+ void SetEnvironmentEstimationMode();
+
+ // Start user input. This should be called when the user indicates start of
+ // input, e.g. by pressing a button.
+ void SetUserInputMode();
+
+ // Process a segment of audio, which may be more than one frame.
+ // The status of the last frame will be returned.
+ EpStatus ProcessAudio(const ShellAudioBus& audio_bus, float* rms_out);
+
+ // Get the status of the endpointer.
+ EpStatus Status(int64_t* time_us);
+
+ // Returns true if the endpointer detected reasonable audio levels above
+ // background noise which could be user speech, false if not.
+ bool DidStartReceivingSpeech() const {
+ return speech_previously_detected_;
+ }
+
+ bool IsEstimatingEnvironment() const {
+ return energy_endpointer_.estimating_environment();
+ }
+
+ void set_speech_input_complete_silence_length(int64_t time_us) {
+ speech_input_complete_silence_length_us_ = time_us;
+ }
+
+ void set_long_speech_input_complete_silence_length(int64_t time_us) {
+ long_speech_input_complete_silence_length_us_ = time_us;
+ }
+
+ void set_speech_input_possibly_complete_silence_length(int64_t time_us) {
+ speech_input_possibly_complete_silence_length_us_ = time_us;
+ }
+
+ void set_long_speech_length(int64_t time_us) {
+ long_speech_length_us_ = time_us;
+ }
+
+ bool speech_input_complete() const {
+ return speech_input_complete_;
+ }
+
+ // RMS background noise level in dB.
+ float NoiseLevelDb() const { return energy_endpointer_.GetNoiseLevelDb(); }
+
+ private:
+ // Reset internal states. Helper method common to initial input utterance
+ // and following input utternaces.
+ void Reset();
+
+ // Minimum allowable length of speech input.
+ int64_t speech_input_minimum_length_us_;
+
+ // The speechInputPossiblyComplete event signals that silence/noise has been
+ // detected for a *short* amount of time after some speech has been detected.
+ // This proporty specifies the time period.
+ int64_t speech_input_possibly_complete_silence_length_us_;
+
+ // The speechInputComplete event signals that silence/noise has been
+ // detected for a *long* amount of time after some speech has been detected.
+ // This property specifies the time period.
+ int64_t speech_input_complete_silence_length_us_;
+
+ // Same as above, this specifies the required silence period after speech
+ // detection. This period is used instead of
+ // speech_input_complete_silence_length_ when the utterance is longer than
+ // long_speech_length_. This parameter is optional.
+ int64_t long_speech_input_complete_silence_length_us_;
+
+ // The period of time after which the endpointer should consider
+ // long_speech_input_complete_silence_length_ as a valid silence period
+ // instead of speech_input_complete_silence_length_. This parameter is
+ // optional.
+ int64_t long_speech_length_us_;
+
+ // First speech onset time, used in determination of speech complete timeout.
+ int64_t speech_start_time_us_;
+
+ // Most recent end time, used in determination of speech complete timeout.
+ int64_t speech_end_time_us_;
+
+ int64_t audio_frame_time_us_;
+ EpStatus old_ep_status_;
+ bool waiting_for_speech_possibly_complete_timeout_;
+ bool waiting_for_speech_complete_timeout_;
+ bool speech_previously_detected_;
+ bool speech_input_complete_;
+ EnergyEndpointer energy_endpointer_;
+ int sample_rate_;
+ int32_t frame_size_;
+};
+
+} // namespace speech
+} // namespace cobalt
+
+#endif // COBALT_SPEECH_ENDPOINTER_ENDPOINTER_H_
diff --git a/src/cobalt/speech/endpointer/endpointer_unittest.cc b/src/cobalt/speech/endpointer/endpointer_unittest.cc
new file mode 100644
index 0000000..29465d5
--- /dev/null
+++ b/src/cobalt/speech/endpointer/endpointer_unittest.cc
@@ -0,0 +1,163 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdint.h>
+
+#include "cobalt/speech/endpointer/endpointer.h"
+#include "media/base/shell_audio_bus.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+const int kFrameRate = 50; // 20 ms long frames for AMR encoding.
+const int kSampleRate = 8000; // 8 k samples per second for AMR encoding.
+
+// At 8 sample per second a 20 ms frame is 160 samples, which corrsponds
+// to the AMR codec.
+const int kFrameSize = kSampleRate / kFrameRate; // 160 samples.
+
+#if defined(OS_STARBOARD)
+SB_COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size);
+#else
+COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size);
+#endif // defined(OS_STARBOARD)
+} // namespace
+
+namespace cobalt {
+namespace speech {
+
+class FrameProcessor {
+ public:
+ // Process a single frame of test audio samples.
+ virtual EpStatus ProcessFrame(int64_t time,
+ int16_t* samples,
+ int frame_size) = 0;
+};
+
+void RunEndpointerEventsTest(FrameProcessor* processor) {
+ int16_t samples[kFrameSize];
+
+ // We will create a white noise signal of 150 frames. The frames from 50 to
+ // 100 will have more power, and the endpointer should fire on those frames.
+ const int kNumFrames = 150;
+
+ // Create a random sequence of samples.
+ srand(1);
+ float gain = 0.0;
+ int64_t time = 0;
+ for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) {
+ // The frames from 50 to 100 will have more power, and the endpointer
+ // should detect those frames as speech.
+ if ((frame_count >= 50) && (frame_count < 100)) {
+ gain = 2000.0;
+ } else {
+ gain = 1.0;
+ }
+ // Create random samples.
+ for (int i = 0; i < kFrameSize; ++i) {
+ float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) /
+ static_cast<float>(RAND_MAX);
+ samples[i] = static_cast<int16_t>(gain * randNum);
+ }
+
+ EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize);
+ time += static_cast<int64_t>(kFrameSize * (1e6 / kSampleRate));
+
+ // Log the status.
+ if (20 == frame_count)
+ EXPECT_EQ(EP_PRE_SPEECH, ep_status);
+ if (70 == frame_count)
+ EXPECT_EQ(EP_SPEECH_PRESENT, ep_status);
+ if (120 == frame_count)
+ EXPECT_EQ(EP_PRE_SPEECH, ep_status);
+ }
+}
+
+// This test instantiates and initializes a stand alone endpointer module.
+// The test creates FrameData objects with random noise and send them
+// to the endointer module. The energy of the first 50 frames is low,
+// followed by 500 high energy frames, and another 50 low energy frames.
+// We test that the correct start and end frames were detected.
+class EnergyEndpointerFrameProcessor : public FrameProcessor {
+ public:
+ explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer)
+ : endpointer_(endpointer) {}
+
+ EpStatus ProcessFrame(int64_t time,
+ int16_t* samples,
+ int /*frame_size*/) override {
+ endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL);
+
+ int64_t ep_time;
+ return endpointer_->Status(&ep_time);
+ }
+
+ private:
+ EnergyEndpointer* endpointer_;
+};
+
+TEST(EndpointerTest, TestEnergyEndpointerEvents) {
+ // Initialize endpointer and configure it. We specify the parameters
+ // here for a 20ms window, and a 20ms step size, which corrsponds to
+ // the narrow band AMR codec.
+ EnergyEndpointerParams ep_config;
+ ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
+ ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
+ ep_config.set_endpoint_margin(0.2f);
+ ep_config.set_onset_window(0.15f);
+ ep_config.set_speech_on_window(0.4f);
+ ep_config.set_offset_window(0.15f);
+ ep_config.set_onset_detect_dur(0.09f);
+ ep_config.set_onset_confirm_dur(0.075f);
+ ep_config.set_on_maintain_dur(0.10f);
+ ep_config.set_offset_confirm_dur(0.12f);
+ ep_config.set_decision_threshold(100.0f);
+ EnergyEndpointer endpointer;
+ endpointer.Init(ep_config);
+
+ endpointer.StartSession();
+
+ EnergyEndpointerFrameProcessor frame_processor(&endpointer);
+ RunEndpointerEventsTest(&frame_processor);
+
+ endpointer.EndSession();
+}
+
+// Test endpointer wrapper class.
+class EndpointerFrameProcessor : public FrameProcessor {
+ public:
+ typedef ::media::ShellAudioBus ShellAudioBus;
+ explicit EndpointerFrameProcessor(Endpointer* endpointer)
+ : endpointer_(endpointer) {}
+
+ EpStatus ProcessFrame(int64_t /*time*/,
+ int16_t* samples,
+ int /*frame_size*/) override {
+ scoped_ptr<ShellAudioBus> frame(new ShellAudioBus(1, kFrameSize, samples));
+ endpointer_->ProcessAudio(*frame, NULL);
+
+ int64_t ep_time;
+ return endpointer_->Status(&ep_time);
+ }
+
+ private:
+ Endpointer* endpointer_;
+};
+
+TEST(EndpointerTest, TestEmbeddedEndpointerEvents) {
+ Endpointer endpointer(kSampleRate);
+ const int64_t kMillisecondsPerMicrosecond = 1000;
+ const int64_t short_timeout = 300 * kMillisecondsPerMicrosecond;
+ endpointer.set_speech_input_possibly_complete_silence_length(short_timeout);
+ const int64_t long_timeout = 500 * kMillisecondsPerMicrosecond;
+ endpointer.set_speech_input_complete_silence_length(long_timeout);
+ endpointer.StartSession();
+
+ EndpointerFrameProcessor frame_processor(&endpointer);
+ RunEndpointerEventsTest(&frame_processor);
+
+ endpointer.EndSession();
+}
+
+} // namespace speech
+} // namespace cobalt
diff --git a/src/cobalt/speech/endpointer/energy_endpointer.cc b/src/cobalt/speech/endpointer/energy_endpointer.cc
new file mode 100644
index 0000000..bd7ca80
--- /dev/null
+++ b/src/cobalt/speech/endpointer/energy_endpointer.cc
@@ -0,0 +1,379 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cobalt/speech/endpointer/energy_endpointer.h"
+
+#include <math.h>
+#include <stddef.h>
+
+#include "base/logging.h"
+
+namespace {
+
+// Returns the RMS (quadratic mean) of the input signal.
+float RMS(const int16_t* samples, int num_samples) {
+ int64_t ssq_int64 = 0;
+ int64_t sum_int64 = 0;
+ for (int i = 0; i < num_samples; ++i) {
+ sum_int64 += samples[i];
+ ssq_int64 += samples[i] * samples[i];
+ }
+ // now convert to floats.
+ double sum = static_cast<double>(sum_int64);
+ sum /= num_samples;
+ double ssq = static_cast<double>(ssq_int64);
+ return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum)));
+}
+
+int64_t Secs2Usecs(float seconds) {
+ return static_cast<int64_t>(0.5 + (1.0e6 * seconds));
+}
+
+float GetDecibel(float value) {
+ if (value > 1.0e-100)
+ return static_cast<float>(20 * log10(value));
+ return -2000.0;
+}
+
+} // namespace
+
+namespace cobalt {
+namespace speech {
+
+// Stores threshold-crossing histories for making decisions about the speech
+// state.
+class EnergyEndpointer::HistoryRing {
+ public:
+ HistoryRing() : insertion_index_(0) {}
+
+ // Resets the ring to |size| elements each with state |initial_state|
+ void SetRing(int size, bool initial_state);
+
+ // Inserts a new entry into the ring and drops the oldest entry.
+ void Insert(int64_t time_us, bool decision);
+
+ // Returns the time in microseconds of the most recently added entry.
+ int64_t EndTime() const;
+
+ // Returns the sum of all intervals during which 'decision' is true within
+ // the time in seconds specified by 'duration'. The returned interval is
+ // in seconds.
+ float RingSum(float duration_sec);
+
+ private:
+ struct DecisionPoint {
+ int64_t time_us;
+ bool decision;
+ };
+
+ std::vector<DecisionPoint> decision_points_;
+ int insertion_index_; // Index at which the next item gets added/inserted.
+
+ DISALLOW_COPY_AND_ASSIGN(HistoryRing);
+};
+
+void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) {
+ insertion_index_ = 0;
+ decision_points_.clear();
+ DecisionPoint init = { -1, initial_state };
+ decision_points_.resize(static_cast<size_t>(size), init);
+}
+
+void EnergyEndpointer::HistoryRing::Insert(int64_t time_us, bool decision) {
+ decision_points_[static_cast<size_t>(insertion_index_)].time_us = time_us;
+ decision_points_[static_cast<size_t>(insertion_index_)].decision = decision;
+ insertion_index_ =
+ static_cast<int>((insertion_index_ + 1) % decision_points_.size());
+}
+
+int64_t EnergyEndpointer::HistoryRing::EndTime() const {
+ int ind = insertion_index_ - 1;
+ if (ind < 0)
+ ind = static_cast<int>(decision_points_.size() - 1);
+ return decision_points_[static_cast<size_t>(ind)].time_us;
+}
+
+float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) {
+ if (decision_points_.empty())
+ return 0.0;
+
+ int64_t sum_us = 0;
+ int ind = insertion_index_ - 1;
+ if (ind < 0)
+ ind = static_cast<int>(decision_points_.size() - 1);
+ int64_t end_us = decision_points_[static_cast<size_t>(ind)].time_us;
+ bool is_on = decision_points_[static_cast<size_t>(ind)].decision;
+ int64_t start_us =
+ end_us - static_cast<int64_t>(0.5 + (1.0e6 * duration_sec));
+ if (start_us < 0)
+ start_us = 0;
+ size_t n_summed = 1; // n points ==> (n-1) intervals
+ while ((decision_points_[static_cast<size_t>(ind)].time_us > start_us) &&
+ (n_summed < decision_points_.size())) {
+ --ind;
+ if (ind < 0)
+ ind = static_cast<int>(decision_points_.size() - 1);
+ if (is_on)
+ sum_us += end_us - decision_points_[static_cast<size_t>(ind)].time_us;
+ is_on = decision_points_[static_cast<size_t>(ind)].decision;
+ end_us = decision_points_[static_cast<size_t>(ind)].time_us;
+ n_summed++;
+ }
+
+ return 1.0e-6f * sum_us; // Returns total time that was super threshold.
+}
+
+EnergyEndpointer::EnergyEndpointer()
+ : status_(EP_PRE_SPEECH),
+ offset_confirm_dur_sec_(0),
+ endpointer_time_us_(0),
+ fast_update_frames_(0),
+ frame_counter_(0),
+ max_window_dur_(4.0),
+ sample_rate_(0),
+ history_(new HistoryRing()),
+ decision_threshold_(0),
+ estimating_environment_(false),
+ noise_level_(0),
+ rms_adapt_(0),
+ start_lag_(0),
+ end_lag_(0),
+ user_input_start_time_us_(0) {
+}
+
+EnergyEndpointer::~EnergyEndpointer() {
+}
+
+int EnergyEndpointer::TimeToFrame(float time) const {
+ return static_cast<int32_t>(0.5 + (time / params_.frame_period()));
+}
+
+void EnergyEndpointer::Restart(bool reset_threshold) {
+ status_ = EP_PRE_SPEECH;
+ user_input_start_time_us_ = 0;
+
+ if (reset_threshold) {
+ decision_threshold_ = params_.decision_threshold();
+ rms_adapt_ = decision_threshold_;
+ noise_level_ = params_.decision_threshold() / 2.0f;
+ frame_counter_ = 0; // Used for rapid initial update of levels.
+ }
+
+ // Set up the memories to hold the history windows.
+ history_->SetRing(TimeToFrame(max_window_dur_), false);
+
+ // Flag that indicates that current input should be used for
+ // estimating the environment. The user has not yet started input
+ // by e.g. pressed the push-to-talk button. By default, this is
+ // false for backward compatibility.
+ estimating_environment_ = false;
+}
+
+void EnergyEndpointer::Init(const EnergyEndpointerParams& params) {
+ params_ = params;
+
+ // Find the longest history interval to be used, and make the ring
+ // large enough to accommodate that number of frames. NOTE: This
+ // depends upon ep_frame_period being set correctly in the factory
+ // that did this instantiation.
+ max_window_dur_ = params_.onset_window();
+ if (params_.speech_on_window() > max_window_dur_)
+ max_window_dur_ = params_.speech_on_window();
+ if (params_.offset_window() > max_window_dur_)
+ max_window_dur_ = params_.offset_window();
+ Restart(true);
+
+ offset_confirm_dur_sec_ = params_.offset_window() -
+ params_.offset_confirm_dur();
+ if (offset_confirm_dur_sec_ < 0.0)
+ offset_confirm_dur_sec_ = 0.0;
+
+ user_input_start_time_us_ = 0;
+
+ // Flag that indicates that current input should be used for
+ // estimating the environment. The user has not yet started input
+ // by e.g. pressed the push-to-talk button. By default, this is
+ // false for backward compatibility.
+ estimating_environment_ = false;
+ // The initial value of the noise and speech levels is inconsequential.
+ // The level of the first frame will overwrite these values.
+ noise_level_ = params_.decision_threshold() / 2.0f;
+ fast_update_frames_ =
+ static_cast<int64_t>(params_.fast_update_dur() / params_.frame_period());
+
+ frame_counter_ = 0; // Used for rapid initial update of levels.
+
+ sample_rate_ = params_.sample_rate();
+ start_lag_ = static_cast<int>(sample_rate_ /
+ params_.max_fundamental_frequency());
+ end_lag_ = static_cast<int>(sample_rate_ /
+ params_.min_fundamental_frequency());
+}
+
+void EnergyEndpointer::StartSession() {
+ Restart(true);
+}
+
+void EnergyEndpointer::EndSession() {
+ status_ = EP_POST_SPEECH;
+}
+
+void EnergyEndpointer::SetEnvironmentEstimationMode() {
+ Restart(true);
+ estimating_environment_ = true;
+}
+
+void EnergyEndpointer::SetUserInputMode() {
+ estimating_environment_ = false;
+ user_input_start_time_us_ = endpointer_time_us_;
+}
+
+void EnergyEndpointer::ProcessAudioFrame(int64_t time_us,
+ const int16_t* samples,
+ int num_samples,
+ float* rms_out) {
+ endpointer_time_us_ = time_us;
+ float rms = RMS(samples, num_samples);
+
+ // Check that this is user input audio vs. pre-input adaptation audio.
+ // Input audio starts when the user indicates start of input, by e.g.
+ // pressing push-to-talk. Audio received prior to that is used to update
+ // noise and speech level estimates.
+ if (!estimating_environment_) {
+ bool decision = false;
+ if ((endpointer_time_us_ - user_input_start_time_us_) <
+ Secs2Usecs(params_.contamination_rejection_period())) {
+ decision = false;
+ DVLOG(1) << "decision: forced to false, time: " << endpointer_time_us_;
+ } else {
+ decision = (rms > decision_threshold_);
+ }
+
+ history_->Insert(endpointer_time_us_, decision);
+
+ switch (status_) {
+ case EP_PRE_SPEECH:
+ if (history_->RingSum(params_.onset_window()) >
+ params_.onset_detect_dur()) {
+ status_ = EP_POSSIBLE_ONSET;
+ }
+ break;
+
+ case EP_POSSIBLE_ONSET: {
+ float tsum = history_->RingSum(params_.onset_window());
+ if (tsum > params_.onset_confirm_dur()) {
+ status_ = EP_SPEECH_PRESENT;
+ } else { // If signal is not maintained, drop back to pre-speech.
+ if (tsum <= params_.onset_detect_dur())
+ status_ = EP_PRE_SPEECH;
+ }
+ break;
+ }
+
+ case EP_SPEECH_PRESENT: {
+ // To induce hysteresis in the state residency, we allow a
+ // smaller residency time in the on_ring, than was required to
+ // enter the SPEECH_PERSENT state.
+ float on_time = history_->RingSum(params_.speech_on_window());
+ if (on_time < params_.on_maintain_dur())
+ status_ = EP_POSSIBLE_OFFSET;
+ break;
+ }
+
+ case EP_POSSIBLE_OFFSET:
+ if (history_->RingSum(params_.offset_window()) <=
+ offset_confirm_dur_sec_) {
+ // Note that this offset time may be beyond the end
+ // of the input buffer in a real-time system. It will be up
+ // to the RecognizerSession to decide what to do.
+ status_ = EP_PRE_SPEECH; // Automatically reset for next utterance.
+ } else { // If speech picks up again we allow return to SPEECH_PRESENT.
+ if (history_->RingSum(params_.speech_on_window()) >=
+ params_.on_maintain_dur())
+ status_ = EP_SPEECH_PRESENT;
+ }
+ break;
+
+ case EP_POST_SPEECH:
+ // fall-through
+ default:
+ LOG(WARNING) << "Invalid case in switch: " << status_;
+ break;
+ }
+
+ // If this is a quiet, non-speech region, slowly adapt the detection
+ // threshold to be about 6dB above the average RMS.
+ if ((!decision) && (status_ == EP_PRE_SPEECH)) {
+ decision_threshold_ = (0.98f * decision_threshold_) + (0.02f * 2 * rms);
+ rms_adapt_ = decision_threshold_;
+ } else {
+ // If this is in a speech region, adapt the decision threshold to
+ // be about 10dB below the average RMS. If the noise level is high,
+ // the threshold is pushed up.
+ // Adaptation up to a higher level is 5 times faster than decay to
+ // a lower level.
+ if ((status_ == EP_SPEECH_PRESENT) && decision) {
+ if (rms_adapt_ > rms) {
+ rms_adapt_ = (0.99f * rms_adapt_) + (0.01f * rms);
+ } else {
+ rms_adapt_ = (0.95f * rms_adapt_) + (0.05f * rms);
+ }
+ float target_threshold = 0.3f * rms_adapt_ + noise_level_;
+ decision_threshold_ = (.90f * decision_threshold_) +
+ (0.10f * target_threshold);
+ }
+ }
+
+ // Set a floor
+ if (decision_threshold_ < params_.min_decision_threshold())
+ decision_threshold_ = params_.min_decision_threshold();
+ }
+
+ // Update speech and noise levels.
+ UpdateLevels(rms);
+ ++frame_counter_;
+
+ if (rms_out)
+ *rms_out = GetDecibel(rms);
+}
+
+float EnergyEndpointer::GetNoiseLevelDb() const {
+ return GetDecibel(noise_level_);
+}
+
+void EnergyEndpointer::UpdateLevels(float rms) {
+ // Update quickly initially. We assume this is noise and that
+ // speech is 6dB above the noise.
+ if (frame_counter_ < fast_update_frames_) {
+ // Alpha increases from 0 to (k-1)/k where k is the number of time
+ // steps in the initial adaptation period.
+ float alpha = static_cast<float>(frame_counter_) /
+ static_cast<float>(fast_update_frames_);
+ noise_level_ = (alpha * noise_level_) + ((1 - alpha) * rms);
+ DVLOG(1) << "FAST UPDATE, frame_counter_ " << frame_counter_
+ << ", fast_update_frames_ " << fast_update_frames_;
+ } else {
+ // Update Noise level. The noise level adapts quickly downward, but
+ // slowly upward. The noise_level_ parameter is not currently used
+ // for threshold adaptation. It is used for UI feedback.
+ if (noise_level_ < rms)
+ noise_level_ = (0.999f * noise_level_) + (0.001f * rms);
+ else
+ noise_level_ = (0.95f * noise_level_) + (0.05f * rms);
+ }
+ if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) {
+ decision_threshold_ = noise_level_ * 2; // 6dB above noise level.
+ // Set a floor
+ if (decision_threshold_ < params_.min_decision_threshold())
+ decision_threshold_ = params_.min_decision_threshold();
+ }
+}
+
+EpStatus EnergyEndpointer::Status(int64_t* status_time) const {
+ *status_time = history_->EndTime();
+ return status_;
+}
+
+} // namespace speech
+} // namespace cobalt
diff --git a/src/cobalt/speech/endpointer/energy_endpointer.h b/src/cobalt/speech/endpointer/energy_endpointer.h
new file mode 100644
index 0000000..d3822ea
--- /dev/null
+++ b/src/cobalt/speech/endpointer/energy_endpointer.h
@@ -0,0 +1,162 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The EnergyEndpointer class finds likely speech onset and offset points.
+//
+// The implementation described here is about the simplest possible.
+// It is based on timings of threshold crossings for overall signal
+// RMS. It is suitable for light weight applications.
+//
+// As written, the basic idea is that one specifies intervals that
+// must be occupied by super- and sub-threshold energy levels, and
+// defers decisions re onset and offset times until these
+// specifications have been met. Three basic intervals are tested: an
+// onset window, a speech-on window, and an offset window. We require
+// super-threshold to exceed some mimimum total durations in the onset
+// and speech-on windows before declaring the speech onset time, and
+// we specify a required sub-threshold residency in the offset window
+// before declaring speech offset. As the various residency requirements are
+// met, the EnergyEndpointer instance assumes various states, and can return the
+// ID of these states to the client (see EpStatus below).
+//
+// The levels of the speech and background noise are continuously updated. It is
+// important that the background noise level be estimated initially for
+// robustness in noisy conditions. The first frames are assumed to be background
+// noise and a fast update rate is used for the noise level. The duration for
+// fast update is controlled by the fast_update_dur_ paramter.
+//
+// If used in noisy conditions, the endpointer should be started and run in the
+// EnvironmentEstimation mode, for at least 200ms, before switching to
+// UserInputMode.
+// Audio feedback contamination can appear in the input audio, if not cut
+// out or handled by echo cancellation. Audio feedback can trigger a false
+// accept. The false accepts can be ignored by setting
+// ep_contamination_rejection_period.
+
+#ifndef COBALT_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+#define COBALT_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+
+#include <stdint.h>
+
+#include <memory>
+#include <vector>
+
+#include "base/memory/scoped_ptr.h"
+#include "cobalt/speech/endpointer/energy_endpointer_params.h"
+
+namespace cobalt {
+namespace speech {
+
+// Endpointer status codes
+enum EpStatus {
+ EP_PRE_SPEECH = 10,
+ EP_POSSIBLE_ONSET,
+ EP_SPEECH_PRESENT,
+ EP_POSSIBLE_OFFSET,
+ EP_POST_SPEECH,
+};
+
+class EnergyEndpointer {
+ public:
+ // The default construction MUST be followed by Init(), before any
+ // other use can be made of the instance.
+ EnergyEndpointer();
+ virtual ~EnergyEndpointer();
+
+ void Init(const EnergyEndpointerParams& params);
+
+ // Start the endpointer. This should be called at the beginning of a session.
+ void StartSession();
+
+ // Stop the endpointer.
+ void EndSession();
+
+ // Start environment estimation. Audio will be used for environment estimation
+ // i.e. noise level estimation.
+ void SetEnvironmentEstimationMode();
+
+ // Start user input. This should be called when the user indicates start of
+ // input, e.g. by pressing a button.
+ void SetUserInputMode();
+
+ // Computes the next input frame and modifies EnergyEndpointer status as
+ // appropriate based on the computation.
+ void ProcessAudioFrame(int64_t time_us,
+ const int16_t* samples,
+ int num_samples,
+ float* rms_out);
+
+ // Returns the current state of the EnergyEndpointer and the time
+ // corresponding to the most recently computed frame.
+ EpStatus Status(int64_t* status_time_us) const;
+
+ bool estimating_environment() const {
+ return estimating_environment_;
+ }
+
+ // Returns estimated noise level in dB.
+ float GetNoiseLevelDb() const;
+
+ private:
+ class HistoryRing;
+
+ // Resets the endpointer internal state. If reset_threshold is true, the
+ // state will be reset completely, including adaptive thresholds and the
+ // removal of all history information.
+ void Restart(bool reset_threshold);
+
+ // Update internal speech and noise levels.
+ void UpdateLevels(float rms);
+
+ // Returns the number of frames (or frame number) corresponding to
+ // the 'time' (in seconds).
+ int TimeToFrame(float time) const;
+
+ EpStatus status_; // The current state of this instance.
+ float offset_confirm_dur_sec_; // max on time allowed to confirm POST_SPEECH
+ int64_t
+ endpointer_time_us_; // Time of the most recently received audio frame.
+ int64_t
+ fast_update_frames_; // Number of frames for initial level adaptation.
+ int64_t
+ frame_counter_; // Number of frames seen. Used for initial adaptation.
+ float max_window_dur_; // Largest search window size (seconds)
+ float sample_rate_; // Sampling rate.
+
+ // Ring buffers to hold the speech activity history.
+ scoped_ptr<HistoryRing> history_;
+
+ // Configuration parameters.
+ EnergyEndpointerParams params_;
+
+ // RMS which must be exceeded to conclude frame is speech.
+ float decision_threshold_;
+
+ // Flag to indicate that audio should be used to estimate environment, prior
+ // to receiving user input.
+ bool estimating_environment_;
+
+ // Estimate of the background noise level. Used externally for UI feedback.
+ float noise_level_;
+
+ // An adaptive threshold used to update decision_threshold_ when appropriate.
+ float rms_adapt_;
+
+ // Start lag corresponds to the highest fundamental frequency.
+ int start_lag_;
+
+ // End lag corresponds to the lowest fundamental frequency.
+ int end_lag_;
+
+ // Time when mode switched from environment estimation to user input. This
+ // is used to time forced rejection of audio feedback contamination.
+ int64_t user_input_start_time_us_;
+
+ DISALLOW_COPY_AND_ASSIGN(EnergyEndpointer);
+};
+
+} // namespace speech
+} // namespace cobalt
+
+#endif // COBALT_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
diff --git a/src/cobalt/speech/endpointer/energy_endpointer_params.cc b/src/cobalt/speech/endpointer/energy_endpointer_params.cc
new file mode 100644
index 0000000..c71e11c
--- /dev/null
+++ b/src/cobalt/speech/endpointer/energy_endpointer_params.cc
@@ -0,0 +1,55 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "cobalt/speech/endpointer/energy_endpointer_params.h"
+
+namespace cobalt {
+namespace speech {
+
+EnergyEndpointerParams::EnergyEndpointerParams() {
+ SetDefaults();
+}
+
+void EnergyEndpointerParams::SetDefaults() {
+ frame_period_ = 0.01f;
+ frame_duration_ = 0.01f;
+ endpoint_margin_ = 0.2f;
+ onset_window_ = 0.15f;
+ speech_on_window_ = 0.4f;
+ offset_window_ = 0.15f;
+ onset_detect_dur_ = 0.09f;
+ onset_confirm_dur_ = 0.075f;
+ on_maintain_dur_ = 0.10f;
+ offset_confirm_dur_ = 0.12f;
+ decision_threshold_ = 150.0f;
+ min_decision_threshold_ = 50.0f;
+ fast_update_dur_ = 0.2f;
+ sample_rate_ = 8000.0f;
+ min_fundamental_frequency_ = 57.143f;
+ max_fundamental_frequency_ = 400.0f;
+ contamination_rejection_period_ = 0.25f;
+}
+
+void EnergyEndpointerParams::operator=(const EnergyEndpointerParams& source) {
+ frame_period_ = source.frame_period();
+ frame_duration_ = source.frame_duration();
+ endpoint_margin_ = source.endpoint_margin();
+ onset_window_ = source.onset_window();
+ speech_on_window_ = source.speech_on_window();
+ offset_window_ = source.offset_window();
+ onset_detect_dur_ = source.onset_detect_dur();
+ onset_confirm_dur_ = source.onset_confirm_dur();
+ on_maintain_dur_ = source.on_maintain_dur();
+ offset_confirm_dur_ = source.offset_confirm_dur();
+ decision_threshold_ = source.decision_threshold();
+ min_decision_threshold_ = source.min_decision_threshold();
+ fast_update_dur_ = source.fast_update_dur();
+ sample_rate_ = source.sample_rate();
+ min_fundamental_frequency_ = source.min_fundamental_frequency();
+ max_fundamental_frequency_ = source.max_fundamental_frequency();
+ contamination_rejection_period_ = source.contamination_rejection_period();
+}
+
+} // namespace speech
+} // namespace cobalt
diff --git a/src/cobalt/speech/endpointer/energy_endpointer_params.h b/src/cobalt/speech/endpointer/energy_endpointer_params.h
new file mode 100644
index 0000000..06810d0
--- /dev/null
+++ b/src/cobalt/speech/endpointer/energy_endpointer_params.h
@@ -0,0 +1,137 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COBALT_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+#define COBALT_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+
+namespace cobalt {
+namespace speech {
+
+// Input parameters for the EnergyEndpointer class.
+class EnergyEndpointerParams {
+ public:
+ EnergyEndpointerParams();
+
+ void SetDefaults();
+
+ void operator=(const EnergyEndpointerParams& source);
+
+ // Accessors and mutators
+ float frame_period() const { return frame_period_; }
+ void set_frame_period(float frame_period) {
+ frame_period_ = frame_period;
+ }
+
+ float frame_duration() const { return frame_duration_; }
+ void set_frame_duration(float frame_duration) {
+ frame_duration_ = frame_duration;
+ }
+
+ float endpoint_margin() const { return endpoint_margin_; }
+ void set_endpoint_margin(float endpoint_margin) {
+ endpoint_margin_ = endpoint_margin;
+ }
+
+ float onset_window() const { return onset_window_; }
+ void set_onset_window(float onset_window) { onset_window_ = onset_window; }
+
+ float speech_on_window() const { return speech_on_window_; }
+ void set_speech_on_window(float speech_on_window) {
+ speech_on_window_ = speech_on_window;
+ }
+
+ float offset_window() const { return offset_window_; }
+ void set_offset_window(float offset_window) {
+ offset_window_ = offset_window;
+ }
+
+ float onset_detect_dur() const { return onset_detect_dur_; }
+ void set_onset_detect_dur(float onset_detect_dur) {
+ onset_detect_dur_ = onset_detect_dur;
+ }
+
+ float onset_confirm_dur() const { return onset_confirm_dur_; }
+ void set_onset_confirm_dur(float onset_confirm_dur) {
+ onset_confirm_dur_ = onset_confirm_dur;
+ }
+
+ float on_maintain_dur() const { return on_maintain_dur_; }
+ void set_on_maintain_dur(float on_maintain_dur) {
+ on_maintain_dur_ = on_maintain_dur;
+ }
+
+ float offset_confirm_dur() const { return offset_confirm_dur_; }
+ void set_offset_confirm_dur(float offset_confirm_dur) {
+ offset_confirm_dur_ = offset_confirm_dur;
+ }
+
+ float decision_threshold() const { return decision_threshold_; }
+ void set_decision_threshold(float decision_threshold) {
+ decision_threshold_ = decision_threshold;
+ }
+
+ float min_decision_threshold() const { return min_decision_threshold_; }
+ void set_min_decision_threshold(float min_decision_threshold) {
+ min_decision_threshold_ = min_decision_threshold;
+ }
+
+ float fast_update_dur() const { return fast_update_dur_; }
+ void set_fast_update_dur(float fast_update_dur) {
+ fast_update_dur_ = fast_update_dur;
+ }
+
+ float sample_rate() const { return sample_rate_; }
+ void set_sample_rate(float sample_rate) { sample_rate_ = sample_rate; }
+
+ float min_fundamental_frequency() const { return min_fundamental_frequency_; }
+ void set_min_fundamental_frequency(float min_fundamental_frequency) {
+ min_fundamental_frequency_ = min_fundamental_frequency;
+ }
+
+ float max_fundamental_frequency() const { return max_fundamental_frequency_; }
+ void set_max_fundamental_frequency(float max_fundamental_frequency) {
+ max_fundamental_frequency_ = max_fundamental_frequency;
+ }
+
+ float contamination_rejection_period() const {
+ return contamination_rejection_period_;
+ }
+ void set_contamination_rejection_period(
+ float contamination_rejection_period) {
+ contamination_rejection_period_ = contamination_rejection_period;
+ }
+
+ private:
+ float frame_period_; // Frame period
+ float frame_duration_; // Window size
+ float onset_window_; // Interval scanned for onset activity
+ float speech_on_window_; // Inverval scanned for ongoing speech
+ float offset_window_; // Interval scanned for offset evidence
+ float offset_confirm_dur_; // Silence duration required to confirm offset
+ float decision_threshold_; // Initial rms detection threshold
+ float min_decision_threshold_; // Minimum rms detection threshold
+ float fast_update_dur_; // Period for initial estimation of levels.
+ float sample_rate_; // Expected sample rate.
+
+ // Time to add on either side of endpoint threshold crossings
+ float endpoint_margin_;
+ // Total dur within onset_window required to enter ONSET state
+ float onset_detect_dur_;
+ // Total on time within onset_window required to enter SPEECH_ON state
+ float onset_confirm_dur_;
+ // Minimum dur in SPEECH_ON state required to maintain ON state
+ float on_maintain_dur_;
+ // Minimum fundamental frequency for autocorrelation.
+ float min_fundamental_frequency_;
+ // Maximum fundamental frequency for autocorrelation.
+ float max_fundamental_frequency_;
+ // Period after start of user input that above threshold values are ignored.
+ // This is to reject audio feedback contamination.
+ float contamination_rejection_period_;
+};
+
+} // namespace speech
+} // namespace cobalt
+
+#endif // COBALT_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
diff --git a/src/cobalt/speech/speech.gyp b/src/cobalt/speech/speech.gyp
index 90ba3b1..eb0ffcb 100644
--- a/src/cobalt/speech/speech.gyp
+++ b/src/cobalt/speech/speech.gyp
@@ -30,6 +30,14 @@
'audio_encoder_flac.h',
'chunked_byte_buffer.cc',
'chunked_byte_buffer.h',
+
+ 'endpointer/endpointer.cc',
+ 'endpointer/endpointer.h',
+ 'endpointer/energy_endpointer.cc',
+ 'endpointer/energy_endpointer.h',
+ 'endpointer/energy_endpointer_params.cc',
+ 'endpointer/energy_endpointer_params.h',
+
'google_streaming_api.pb.cc',
'google_streaming_api.pb.h',
'google_streaming_api.pb.proto',
@@ -80,6 +88,7 @@
'type': '<(gtest_target_type)',
'sources': [
'chunked_byte_buffer_unittest.cc',
+ 'endpointer/endpointer_unittest.cc',
],
'dependencies': [
'<(DEPTH)/base/base.gyp:run_all_unittests',
diff --git a/src/cobalt/webdriver_benchmarks/partial_layout_benchmark.py b/src/cobalt/webdriver_benchmarks/partial_layout_benchmark.py
index 479ad5c..fa2642d 100755
--- a/src/cobalt/webdriver_benchmarks/partial_layout_benchmark.py
+++ b/src/cobalt/webdriver_benchmarks/partial_layout_benchmark.py
@@ -10,6 +10,7 @@
import argparse
import importlib
+import inspect
import os
import re
import socket
@@ -20,21 +21,25 @@
arg_parser = argparse.ArgumentParser(
description="Runs Webdriver-based Cobalt benchmarks")
arg_parser.add_argument(
- "-p", "--platform",
+ "-p",
+ "--platform",
help="Cobalt platform, eg 'linux-x64x11'."
"Fetched from environment if absent.")
arg_parser.add_argument(
- "-e", "--executable",
+ "-e",
+ "--executable",
help="Path to cobalt executable. "
"Auto-derived if absent.")
arg_parser.add_argument(
- "-c", "--config",
+ "-c",
+ "--config",
choices=["debug", "devel", "qa", "gold"],
help="Build config (eg, 'qa' or 'devel'). Not used if "
"--executable is specified. Fetched from environment "
"if needed and absent.")
arg_parser.add_argument(
- "-d", "--devkit_name",
+ "-d",
+ "--devkit_name",
help="Devkit or IP address for app_launcher."
"Current hostname used if absent.")
arg_parser.add_argument(
@@ -104,8 +109,11 @@
def __init__(self, platform, executable, devkit_name, log_file_path):
self.selenium_webdriver_module = ImportSeleniumModule("webdriver")
- script_path = os.path.dirname(__file__)
- sys.path.append(script_path + "/../../tools/lbshell/")
+ script_path = os.path.realpath(inspect.getsourcefile(lambda: 0))
+ app_launcher_path = os.path.realpath(
+ os.path.join(
+ os.path.dirname(script_path), "..", "..", "tools", "lbshell"))
+ sys.path.append(app_launcher_path)
app_launcher = importlib.import_module("app_launcher")
self.launcher = app_launcher.CreateLauncher(
platform, executable, devkit_name=devkit_name, close_output_file=False)
@@ -190,7 +198,8 @@
except KeyError:
sys.stderr.write("Must specify --config or --executable\n")
sys.exit(1)
- script_dir = os.path.dirname(os.path.realpath(__file__))
+ script_path = os.path.realpath(inspect.getsourcefile(lambda: 0))
+ script_dir = os.path.dirname(script_path)
out_dir = os.path.join(script_dir, "..", "..", "out")
executable_directory = os.path.join(out_dir, "{}_{}".format(platform, config))
return os.path.join(executable_directory, "cobalt")
diff --git a/src/nb/nb.gyp b/src/nb/nb.gyp
index 6f56df7..b3b9a0d 100644
--- a/src/nb/nb.gyp
+++ b/src/nb/nb.gyp
@@ -79,6 +79,7 @@
'variables': {
'executable_name': 'nb_test',
},
+ 'includes': [ '../starboard/build/deploy.gypi' ],
},
{
diff --git a/src/starboard/shared/starboard/application.cc b/src/starboard/shared/starboard/application.cc
index 4498156..1adc590 100644
--- a/src/starboard/shared/starboard/application.cc
+++ b/src/starboard/shared/starboard/application.cc
@@ -17,7 +17,6 @@
#include "starboard/atomic.h"
#include "starboard/condition_variable.h"
#include "starboard/event.h"
-#include "starboard/log.h"
#include "starboard/memory.h"
#include "starboard/string.h"