Import Cobalt 6.14197

commit: 878ef3e23600a250d4cfb86f37d447f3ed666ca9 [log] [tgz]
author: David Ghandehari <iffy@google.com> Sun Nov 13 21:06:36 2016 -0800
committer: David Ghandehari <iffy@google.com> Sun Nov 13 21:06:36 2016 -0800
tree: e32d35b879b957dcb8bf49e8aff95e9e7090b314
parent: 07c204f02a8437b2b0d3a3536a986b4ee0f3c2e1 [diff]
diff --git a/src/content/browser/speech/chunked_byte_buffer.cc b/src/content/browser/speech/chunked_byte_buffer.cc
new file mode 100644
index 0000000..a43e40a
--- /dev/null
+++ b/src/content/browser/speech/chunked_byte_buffer.cc

@@ -0,0 +1,134 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/browser/speech/chunked_byte_buffer.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "base/lazy_instance.h"
+#include "base/logging.h"
+
+namespace {
+
+static const size_t kHeaderLength = sizeof(uint32_t);
+
+static_assert(sizeof(size_t) >= kHeaderLength,
+              "chunked byte buffer not supported on this architecture");
+
+uint32_t ReadBigEndian32(const uint8_t* buffer) {
+  return (static_cast<uint32_t>(buffer[3])) |
+         (static_cast<uint32_t>(buffer[2]) << 8) |
+         (static_cast<uint32_t>(buffer[1]) << 16) |
+         (static_cast<uint32_t>(buffer[0]) << 24);
+}
+
+}  // namespace
+
+namespace content {
+
+ChunkedByteBuffer::ChunkedByteBuffer()
+    : partial_chunk_(new Chunk()),
+      total_bytes_stored_(0) {
+}
+
+ChunkedByteBuffer::~ChunkedByteBuffer() {
+  Clear();
+}
+
+void ChunkedByteBuffer::Append(const uint8_t* start, size_t length) {
+  size_t remaining_bytes = length;
+  const uint8_t* next_data = start;
+
+  while (remaining_bytes > 0) {
+    DCHECK(partial_chunk_ != NULL);
+    size_t insert_length = 0;
+    bool header_completed = false;
+    bool content_completed = false;
+    std::vector<uint8_t>* insert_target;
+
+    if (partial_chunk_->header.size() < kHeaderLength) {
+      const size_t bytes_to_complete_header =
+          kHeaderLength - partial_chunk_->header.size();
+      insert_length = std::min(bytes_to_complete_header, remaining_bytes);
+      insert_target = &partial_chunk_->header;
+      header_completed = (remaining_bytes >= bytes_to_complete_header);
+    } else {
+      DCHECK_LT(partial_chunk_->content->size(),
+                partial_chunk_->ExpectedContentLength());
+      const size_t bytes_to_complete_chunk =
+          partial_chunk_->ExpectedContentLength() -
+          partial_chunk_->content->size();
+      insert_length = std::min(bytes_to_complete_chunk, remaining_bytes);
+      insert_target = partial_chunk_->content.get();
+      content_completed = (remaining_bytes >= bytes_to_complete_chunk);
+    }
+
+    DCHECK_GT(insert_length, 0U);
+    DCHECK_LE(insert_length, remaining_bytes);
+    DCHECK_LE(next_data + insert_length, start + length);
+    insert_target->insert(insert_target->end(),
+                          next_data,
+                          next_data + insert_length);
+    next_data += insert_length;
+    remaining_bytes -= insert_length;
+
+    if (header_completed) {
+      DCHECK_EQ(partial_chunk_->header.size(), kHeaderLength);
+      if (partial_chunk_->ExpectedContentLength() == 0) {
+        // Handle zero-byte chunks.
+        chunks_.push_back(partial_chunk_.release());
+        partial_chunk_.reset(new Chunk());
+      } else {
+        partial_chunk_->content->reserve(
+            partial_chunk_->ExpectedContentLength());
+      }
+    } else if (content_completed) {
+      DCHECK_EQ(partial_chunk_->content->size(),
+                partial_chunk_->ExpectedContentLength());
+      chunks_.push_back(partial_chunk_.release());
+      partial_chunk_.reset(new Chunk());
+    }
+  }
+  DCHECK_EQ(next_data, start + length);
+  total_bytes_stored_ += length;
+}
+
+void ChunkedByteBuffer::Append(const std::string& string) {
+  Append(reinterpret_cast<const uint8_t*>(string.data()), string.size());
+}
+
+bool ChunkedByteBuffer::HasChunks() const {
+  return !chunks_.empty();
+}
+
+std::unique_ptr<std::vector<uint8_t>> ChunkedByteBuffer::PopChunk() {
+  if (chunks_.empty())
+    return std::unique_ptr<std::vector<uint8_t>>();
+  std::unique_ptr<Chunk> chunk(*chunks_.begin());
+  chunks_.weak_erase(chunks_.begin());
+  DCHECK_EQ(chunk->header.size(), kHeaderLength);
+  DCHECK_EQ(chunk->content->size(), chunk->ExpectedContentLength());
+  total_bytes_stored_ -= chunk->content->size();
+  total_bytes_stored_ -= kHeaderLength;
+  return std::move(chunk->content);
+}
+
+void ChunkedByteBuffer::Clear() {
+  chunks_.clear();
+  partial_chunk_.reset(new Chunk());
+  total_bytes_stored_ = 0;
+}
+
+ChunkedByteBuffer::Chunk::Chunk() : content(new std::vector<uint8_t>()) {}
+
+ChunkedByteBuffer::Chunk::~Chunk() {
+}
+
+size_t ChunkedByteBuffer::Chunk::ExpectedContentLength() const {
+  DCHECK_EQ(header.size(), kHeaderLength);
+  return static_cast<size_t>(ReadBigEndian32(&header[0]));
+}
+
+}  // namespace content

diff --git a/src/content/browser/speech/chunked_byte_buffer.h b/src/content/browser/speech/chunked_byte_buffer.h
new file mode 100644
index 0000000..41e273d
--- /dev/null
+++ b/src/content/browser/speech/chunked_byte_buffer.h

@@ -0,0 +1,78 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_CHUNKED_BYTE_BUFFER_H_
+#define CONTENT_BROWSER_SPEECH_CHUNKED_BYTE_BUFFER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/memory/scoped_vector.h"
+#include "content/common/content_export.h"
+
+namespace content {
+
+// Models a chunk-oriented byte buffer. The term chunk is herein defined as an
+// arbitrary sequence of bytes that is preceeded by N header bytes, indicating
+// its size. Data may be appended to the buffer with no particular respect of
+// chunks boundaries. However, chunks can be extracted (FIFO) only when their
+// content (according to their header) is fully available in the buffer.
+// The current implementation support only 4 byte Big Endian headers.
+// Empty chunks (i.e. the sequence 00 00 00 00) are NOT allowed.
+//
+// E.g. 00 00 00 04 xx xx xx xx 00 00 00 02 yy yy 00 00 00 04 zz zz zz zz
+//      [----- CHUNK 1 -------] [--- CHUNK 2 ---] [------ CHUNK 3 ------]
+class CONTENT_EXPORT ChunkedByteBuffer {
+ public:
+  ChunkedByteBuffer();
+  ~ChunkedByteBuffer();
+
+  // Appends |length| bytes starting from |start| to the buffer.
+  void Append(const uint8_t* start, size_t length);
+
+  // Appends bytes contained in the |string| to the buffer.
+  void Append(const std::string& string);
+
+  // Checks whether one or more complete chunks are available in the buffer.
+  bool HasChunks() const;
+
+  // If enough data is available, reads and removes the first complete chunk
+  // from the buffer. Returns a NULL pointer if no complete chunk is available.
+  std::unique_ptr<std::vector<uint8_t>> PopChunk();
+
+  // Clears all the content of the buffer.
+  void Clear();
+
+  // Returns the number of raw bytes (including headers) present.
+  size_t GetTotalLength() const { return total_bytes_stored_; }
+
+ private:
+  struct Chunk {
+    Chunk();
+    ~Chunk();
+
+    std::vector<uint8_t> header;
+    std::unique_ptr<std::vector<uint8_t>> content;
+    size_t ExpectedContentLength() const;
+
+   private:
+    DISALLOW_COPY_AND_ASSIGN(Chunk);
+  };
+
+  ScopedVector<Chunk> chunks_;
+  std::unique_ptr<Chunk> partial_chunk_;
+  size_t total_bytes_stored_;
+
+  DISALLOW_COPY_AND_ASSIGN(ChunkedByteBuffer);
+};
+
+
+}  // namespace content
+
+#endif  // CONTENT_BROWSER_SPEECH_CHUNKED_BYTE_BUFFER_H_

diff --git a/src/content/browser/speech/chunked_byte_buffer_unittest.cc b/src/content/browser/speech/chunked_byte_buffer_unittest.cc
new file mode 100644
index 0000000..d8a5cb2
--- /dev/null
+++ b/src/content/browser/speech/chunked_byte_buffer_unittest.cc

@@ -0,0 +1,76 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "content/browser/speech/chunked_byte_buffer.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace content {
+
+typedef std::vector<uint8_t> ByteVector;
+
+TEST(ChunkedByteBufferTest, BasicTest) {
+  ChunkedByteBuffer buffer;
+
+  const uint8_t kChunks[] = {
+      0x00, 0x00, 0x00, 0x04, 0x01, 0x02, 0x03, 0x04,  // Chunk 1: 4 bytes
+      0x00, 0x00, 0x00, 0x02, 0x05, 0x06,              // Chunk 2: 2 bytes
+      0x00, 0x00, 0x00, 0x01, 0x07                     // Chunk 3: 1 bytes
+  };
+
+  EXPECT_EQ(0U, buffer.GetTotalLength());
+  EXPECT_FALSE(buffer.HasChunks());
+
+  // Append partially chunk 1.
+  buffer.Append(kChunks, 2);
+  EXPECT_EQ(2U, buffer.GetTotalLength());
+  EXPECT_FALSE(buffer.HasChunks());
+
+  // Complete chunk 1.
+  buffer.Append(kChunks + 2, 6);
+  EXPECT_EQ(8U, buffer.GetTotalLength());
+  EXPECT_TRUE(buffer.HasChunks());
+
+  // Append fully chunk 2.
+  buffer.Append(kChunks + 8, 6);
+  EXPECT_EQ(14U, buffer.GetTotalLength());
+  EXPECT_TRUE(buffer.HasChunks());
+
+  // Remove and check chunk 1.
+  std::unique_ptr<ByteVector> chunk;
+  chunk = buffer.PopChunk();
+  EXPECT_TRUE(chunk != NULL);
+  EXPECT_EQ(4U, chunk->size());
+  EXPECT_EQ(0, std::char_traits<uint8_t>::compare(kChunks + 4, &(*chunk)[0],
+                                                  chunk->size()));
+  EXPECT_EQ(6U, buffer.GetTotalLength());
+  EXPECT_TRUE(buffer.HasChunks());
+
+  // Read and check chunk 2.
+  chunk = buffer.PopChunk();
+  EXPECT_TRUE(chunk != NULL);
+  EXPECT_EQ(2U, chunk->size());
+  EXPECT_EQ(0, std::char_traits<uint8_t>::compare(kChunks + 12, &(*chunk)[0],
+                                                  chunk->size()));
+  EXPECT_EQ(0U, buffer.GetTotalLength());
+  EXPECT_FALSE(buffer.HasChunks());
+
+  // Append fully chunk 3.
+  buffer.Append(kChunks + 14, 5);
+  EXPECT_EQ(5U, buffer.GetTotalLength());
+
+  // Remove and check chunk 3.
+  chunk = buffer.PopChunk();
+  EXPECT_TRUE(chunk != NULL);
+  EXPECT_EQ(1U, chunk->size());
+  EXPECT_EQ((*chunk)[0], kChunks[18]);
+  EXPECT_EQ(0U, buffer.GetTotalLength());
+  EXPECT_FALSE(buffer.HasChunks());
+}
+
+}  // namespace content

diff --git a/src/content/browser/speech/endpointer/endpointer.cc b/src/content/browser/speech/endpointer/endpointer.cc
new file mode 100644
index 0000000..1758970
--- /dev/null
+++ b/src/content/browser/speech/endpointer/endpointer.cc

@@ -0,0 +1,169 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/browser/speech/endpointer/endpointer.h"
+
+#include "base/time/time.h"
+#include "content/browser/speech/audio_buffer.h"
+
+using base::Time;
+
+namespace {
+const int kFrameRate = 50;  // 1 frame = 20ms of audio.
+}
+
+namespace content {
+
+Endpointer::Endpointer(int sample_rate)
+    : speech_input_possibly_complete_silence_length_us_(-1),
+      speech_input_complete_silence_length_us_(-1),
+      audio_frame_time_us_(0),
+      sample_rate_(sample_rate),
+      frame_size_(0) {
+  Reset();
+
+  frame_size_ = static_cast<int>(sample_rate / static_cast<float>(kFrameRate));
+
+  speech_input_minimum_length_us_ =
+      static_cast<int64_t>(1.7 * Time::kMicrosecondsPerSecond);
+  speech_input_complete_silence_length_us_ =
+      static_cast<int64_t>(0.5 * Time::kMicrosecondsPerSecond);
+  long_speech_input_complete_silence_length_us_ = -1;
+  long_speech_length_us_ = -1;
+  speech_input_possibly_complete_silence_length_us_ =
+      1 * Time::kMicrosecondsPerSecond;
+
+  // Set the default configuration for Push To Talk mode.
+  EnergyEndpointerParams ep_config;
+  ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_endpoint_margin(0.2f);
+  ep_config.set_onset_window(0.15f);
+  ep_config.set_speech_on_window(0.4f);
+  ep_config.set_offset_window(0.15f);
+  ep_config.set_onset_detect_dur(0.09f);
+  ep_config.set_onset_confirm_dur(0.075f);
+  ep_config.set_on_maintain_dur(0.10f);
+  ep_config.set_offset_confirm_dur(0.12f);
+  ep_config.set_decision_threshold(1000.0f);
+  ep_config.set_min_decision_threshold(50.0f);
+  ep_config.set_fast_update_dur(0.2f);
+  ep_config.set_sample_rate(static_cast<float>(sample_rate));
+  ep_config.set_min_fundamental_frequency(57.143f);
+  ep_config.set_max_fundamental_frequency(400.0f);
+  ep_config.set_contamination_rejection_period(0.25f);
+  energy_endpointer_.Init(ep_config);
+}
+
+void Endpointer::Reset() {
+  old_ep_status_ = EP_PRE_SPEECH;
+  waiting_for_speech_possibly_complete_timeout_ = false;
+  waiting_for_speech_complete_timeout_ = false;
+  speech_previously_detected_ = false;
+  speech_input_complete_ = false;
+  audio_frame_time_us_ = 0; // Reset time for packets sent to endpointer.
+  speech_end_time_us_ = -1;
+  speech_start_time_us_ = -1;
+}
+
+void Endpointer::StartSession() {
+  Reset();
+  energy_endpointer_.StartSession();
+}
+
+void Endpointer::EndSession() {
+  energy_endpointer_.EndSession();
+}
+
+void Endpointer::SetEnvironmentEstimationMode() {
+  Reset();
+  energy_endpointer_.SetEnvironmentEstimationMode();
+}
+
+void Endpointer::SetUserInputMode() {
+  energy_endpointer_.SetUserInputMode();
+}
+
+EpStatus Endpointer::Status(int64_t* time) {
+  return energy_endpointer_.Status(time);
+}
+
+EpStatus Endpointer::ProcessAudio(const AudioChunk& raw_audio, float* rms_out) {
+  const int16_t* audio_data = raw_audio.SamplesData16();
+  const int num_samples = raw_audio.NumSamples();
+  EpStatus ep_status = EP_PRE_SPEECH;
+
+  // Process the input data in blocks of frame_size_, dropping any incomplete
+  // frames at the end (which is ok since typically the caller will be recording
+  // audio in multiples of our frame size).
+  int sample_index = 0;
+  while (sample_index + frame_size_ <= num_samples) {
+    // Have the endpointer process the frame.
+    energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_,
+                                         audio_data + sample_index,
+                                         frame_size_,
+                                         rms_out);
+    sample_index += frame_size_;
+    audio_frame_time_us_ += (frame_size_ * Time::kMicrosecondsPerSecond) /
+                         sample_rate_;
+
+    // Get the status of the endpointer.
+    int64_t ep_time;
+    ep_status = energy_endpointer_.Status(&ep_time);
+
+    // Handle state changes.
+    if ((EP_SPEECH_PRESENT == ep_status) &&
+        (EP_POSSIBLE_ONSET == old_ep_status_)) {
+      speech_end_time_us_ = -1;
+      waiting_for_speech_possibly_complete_timeout_ = false;
+      waiting_for_speech_complete_timeout_ = false;
+      // Trigger SpeechInputDidStart event on first detection.
+      if (false == speech_previously_detected_) {
+        speech_previously_detected_ = true;
+        speech_start_time_us_ = ep_time;
+      }
+    }
+    if ((EP_PRE_SPEECH == ep_status) &&
+        (EP_POSSIBLE_OFFSET == old_ep_status_)) {
+      speech_end_time_us_ = ep_time;
+      waiting_for_speech_possibly_complete_timeout_ = true;
+      waiting_for_speech_complete_timeout_ = true;
+    }
+    if (ep_time > speech_input_minimum_length_us_) {
+      // Speech possibly complete timeout.
+      if ((waiting_for_speech_possibly_complete_timeout_) &&
+          (ep_time - speech_end_time_us_ >
+              speech_input_possibly_complete_silence_length_us_)) {
+        waiting_for_speech_possibly_complete_timeout_ = false;
+      }
+      if (waiting_for_speech_complete_timeout_) {
+        // The length of the silence timeout period can be held constant, or it
+        // can be changed after a fixed amount of time from the beginning of
+        // speech.
+        bool has_stepped_silence =
+            (long_speech_length_us_ > 0) &&
+            (long_speech_input_complete_silence_length_us_ > 0);
+        int64_t requested_silence_length;
+        if (has_stepped_silence &&
+            (ep_time - speech_start_time_us_) > long_speech_length_us_) {
+          requested_silence_length =
+              long_speech_input_complete_silence_length_us_;
+        } else {
+          requested_silence_length =
+              speech_input_complete_silence_length_us_;
+        }
+
+        // Speech complete timeout.
+        if ((ep_time - speech_end_time_us_) > requested_silence_length) {
+          waiting_for_speech_complete_timeout_ = false;
+          speech_input_complete_ = true;
+        }
+      }
+    }
+    old_ep_status_ = ep_status;
+  }
+  return ep_status;
+}
+
+}  // namespace content

diff --git a/src/content/browser/speech/endpointer/endpointer.h b/src/content/browser/speech/endpointer/endpointer.h
new file mode 100644
index 0000000..5790672
--- /dev/null
+++ b/src/content/browser/speech/endpointer/endpointer.h

@@ -0,0 +1,154 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
+
+#include <stdint.h>
+
+#include "content/browser/speech/endpointer/energy_endpointer.h"
+#include "content/common/content_export.h"
+
+class EpStatus;
+
+namespace content {
+
+class AudioChunk;
+
+// A simple interface to the underlying energy-endpointer implementation, this
+// class lets callers provide audio as being recorded and let them poll to find
+// when the user has stopped speaking.
+//
+// There are two events that may trigger the end of speech:
+//
+// speechInputPossiblyComplete event:
+//
+// Signals that silence/noise has  been detected for a *short* amount of
+// time after some speech has been detected. It can be used for low latency
+// UI feedback. To disable it, set it to a large amount.
+//
+// speechInputComplete event:
+//
+// This event is intended to signal end of input and to stop recording.
+// The amount of time to wait after speech is set by
+// speech_input_complete_silence_length_ and optionally two other
+// parameters (see below).
+// This time can be held constant, or can change as more speech is detected.
+// In the latter case, the time changes after a set amount of time from the
+// *beginning* of speech.  This is motivated by the expectation that there
+// will be two distinct types of inputs: short search queries and longer
+// dictation style input.
+//
+// Three parameters are used to define the piecewise constant timeout function.
+// The timeout length is speech_input_complete_silence_length until
+// long_speech_length, when it changes to
+// long_speech_input_complete_silence_length.
+class CONTENT_EXPORT Endpointer {
+ public:
+  explicit Endpointer(int sample_rate);
+
+  // Start the endpointer. This should be called at the beginning of a session.
+  void StartSession();
+
+  // Stop the endpointer.
+  void EndSession();
+
+  // Start environment estimation. Audio will be used for environment estimation
+  // i.e. noise level estimation.
+  void SetEnvironmentEstimationMode();
+
+  // Start user input. This should be called when the user indicates start of
+  // input, e.g. by pressing a button.
+  void SetUserInputMode();
+
+  // Process a segment of audio, which may be more than one frame.
+  // The status of the last frame will be returned.
+  EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
+
+  // Get the status of the endpointer.
+  EpStatus Status(int64_t* time_us);
+
+  // Returns true if the endpointer detected reasonable audio levels above
+  // background noise which could be user speech, false if not.
+  bool DidStartReceivingSpeech() const {
+    return speech_previously_detected_;
+  }
+
+  bool IsEstimatingEnvironment() const {
+    return energy_endpointer_.estimating_environment();
+  }
+
+  void set_speech_input_complete_silence_length(int64_t time_us) {
+    speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_input_complete_silence_length(int64_t time_us) {
+    long_speech_input_complete_silence_length_us_ = time_us;
+  }
+
+  void set_speech_input_possibly_complete_silence_length(int64_t time_us) {
+    speech_input_possibly_complete_silence_length_us_ = time_us;
+  }
+
+  void set_long_speech_length(int64_t time_us) {
+    long_speech_length_us_ = time_us;
+  }
+
+  bool speech_input_complete() const {
+    return speech_input_complete_;
+  }
+
+  // RMS background noise level in dB.
+  float NoiseLevelDb() const { return energy_endpointer_.GetNoiseLevelDb(); }
+
+ private:
+  // Reset internal states. Helper method common to initial input utterance
+  // and following input utternaces.
+  void Reset();
+
+  // Minimum allowable length of speech input.
+  int64_t speech_input_minimum_length_us_;
+
+  // The speechInputPossiblyComplete event signals that silence/noise has been
+  // detected for a *short* amount of time after some speech has been detected.
+  // This proporty specifies the time period.
+  int64_t speech_input_possibly_complete_silence_length_us_;
+
+  // The speechInputComplete event signals that silence/noise has been
+  // detected for a *long* amount of time after some speech has been detected.
+  // This property specifies the time period.
+  int64_t speech_input_complete_silence_length_us_;
+
+  // Same as above, this specifies the required silence period after speech
+  // detection. This period is used instead of
+  // speech_input_complete_silence_length_ when the utterance is longer than
+  // long_speech_length_. This parameter is optional.
+  int64_t long_speech_input_complete_silence_length_us_;
+
+  // The period of time after which the endpointer should consider
+  // long_speech_input_complete_silence_length_ as a valid silence period
+  // instead of speech_input_complete_silence_length_. This parameter is
+  // optional.
+  int64_t long_speech_length_us_;
+
+  // First speech onset time, used in determination of speech complete timeout.
+  int64_t speech_start_time_us_;
+
+  // Most recent end time, used in determination of speech complete timeout.
+  int64_t speech_end_time_us_;
+
+  int64_t audio_frame_time_us_;
+  EpStatus old_ep_status_;
+  bool waiting_for_speech_possibly_complete_timeout_;
+  bool waiting_for_speech_complete_timeout_;
+  bool speech_previously_detected_;
+  bool speech_input_complete_;
+  EnergyEndpointer energy_endpointer_;
+  int sample_rate_;
+  int32_t frame_size_;
+};
+
+}  // namespace content
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

diff --git a/src/content/browser/speech/endpointer/endpointer_unittest.cc b/src/content/browser/speech/endpointer/endpointer_unittest.cc
new file mode 100644
index 0000000..53ec4d1
--- /dev/null
+++ b/src/content/browser/speech/endpointer/endpointer_unittest.cc

@@ -0,0 +1,156 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stdint.h>
+
+#include "content/browser/speech/audio_buffer.h"
+#include "content/browser/speech/endpointer/endpointer.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+const int kFrameRate = 50;  // 20 ms long frames for AMR encoding.
+const int kSampleRate = 8000;  // 8 k samples per second for AMR encoding.
+
+// At 8 sample per second a 20 ms frame is 160 samples, which corrsponds
+// to the AMR codec.
+const int kFrameSize = kSampleRate / kFrameRate;  // 160 samples.
+static_assert(kFrameSize == 160, "invalid frame size");
+}
+
+namespace content {
+
+class FrameProcessor {
+ public:
+  // Process a single frame of test audio samples.
+  virtual EpStatus ProcessFrame(int64_t time,
+                                int16_t* samples,
+                                int frame_size) = 0;
+};
+
+void RunEndpointerEventsTest(FrameProcessor* processor) {
+  int16_t samples[kFrameSize];
+
+  // We will create a white noise signal of 150 frames. The frames from 50 to
+  // 100 will have more power, and the endpointer should fire on those frames.
+  const int kNumFrames = 150;
+
+  // Create a random sequence of samples.
+  srand(1);
+  float gain = 0.0;
+  int64_t time = 0;
+  for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) {
+    // The frames from 50 to 100 will have more power, and the endpointer
+    // should detect those frames as speech.
+    if ((frame_count >= 50) && (frame_count < 100)) {
+      gain = 2000.0;
+    } else {
+      gain = 1.0;
+    }
+    // Create random samples.
+    for (int i = 0; i < kFrameSize; ++i) {
+      float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) /
+          static_cast<float>(RAND_MAX);
+      samples[i] = static_cast<int16_t>(gain * randNum);
+    }
+
+    EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize);
+    time += static_cast<int64_t>(kFrameSize * (1e6 / kSampleRate));
+
+    // Log the status.
+    if (20 == frame_count)
+      EXPECT_EQ(EP_PRE_SPEECH, ep_status);
+    if (70 == frame_count)
+      EXPECT_EQ(EP_SPEECH_PRESENT, ep_status);
+    if (120 == frame_count)
+      EXPECT_EQ(EP_PRE_SPEECH, ep_status);
+  }
+}
+
+// This test instantiates and initializes a stand alone endpointer module.
+// The test creates FrameData objects with random noise and send them
+// to the endointer module. The energy of the first 50 frames is low,
+// followed by 500 high energy frames, and another 50 low energy frames.
+// We test that the correct start and end frames were detected.
+class EnergyEndpointerFrameProcessor : public FrameProcessor {
+ public:
+  explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer)
+      : endpointer_(endpointer) {}
+
+  EpStatus ProcessFrame(int64_t time,
+                        int16_t* samples,
+                        int frame_size) override {
+    endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL);
+    int64_t ep_time;
+    return endpointer_->Status(&ep_time);
+  }
+
+ private:
+  EnergyEndpointer* endpointer_;
+};
+
+TEST(EndpointerTest, TestEnergyEndpointerEvents) {
+  // Initialize endpointer and configure it. We specify the parameters
+  // here for a 20ms window, and a 20ms step size, which corrsponds to
+  // the narrow band AMR codec.
+  EnergyEndpointerParams ep_config;
+  ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
+  ep_config.set_endpoint_margin(0.2f);
+  ep_config.set_onset_window(0.15f);
+  ep_config.set_speech_on_window(0.4f);
+  ep_config.set_offset_window(0.15f);
+  ep_config.set_onset_detect_dur(0.09f);
+  ep_config.set_onset_confirm_dur(0.075f);
+  ep_config.set_on_maintain_dur(0.10f);
+  ep_config.set_offset_confirm_dur(0.12f);
+  ep_config.set_decision_threshold(100.0f);
+  EnergyEndpointer endpointer;
+  endpointer.Init(ep_config);
+
+  endpointer.StartSession();
+
+  EnergyEndpointerFrameProcessor frame_processor(&endpointer);
+  RunEndpointerEventsTest(&frame_processor);
+
+  endpointer.EndSession();
+};
+
+// Test endpointer wrapper class.
+class EndpointerFrameProcessor : public FrameProcessor {
+ public:
+  explicit EndpointerFrameProcessor(Endpointer* endpointer)
+      : endpointer_(endpointer) {}
+
+  EpStatus ProcessFrame(int64_t time,
+                        int16_t* samples,
+                        int frame_size) override {
+    scoped_refptr<AudioChunk> frame(
+        new AudioChunk(reinterpret_cast<uint8_t*>(samples), kFrameSize * 2, 2));
+    endpointer_->ProcessAudio(*frame.get(), NULL);
+    int64_t ep_time;
+    return endpointer_->Status(&ep_time);
+  }
+
+ private:
+  Endpointer* endpointer_;
+};
+
+TEST(EndpointerTest, TestEmbeddedEndpointerEvents) {
+  const int kSampleRate = 8000;  // 8 k samples per second for AMR encoding.
+
+  Endpointer endpointer(kSampleRate);
+  const int64_t kMillisecondsPerMicrosecond = 1000;
+  const int64_t short_timeout = 300 * kMillisecondsPerMicrosecond;
+  endpointer.set_speech_input_possibly_complete_silence_length(short_timeout);
+  const int64_t long_timeout = 500 * kMillisecondsPerMicrosecond;
+  endpointer.set_speech_input_complete_silence_length(long_timeout);
+  endpointer.StartSession();
+
+  EndpointerFrameProcessor frame_processor(&endpointer);
+  RunEndpointerEventsTest(&frame_processor);
+
+  endpointer.EndSession();
+}
+
+}  // namespace content

diff --git a/src/content/browser/speech/endpointer/energy_endpointer.cc b/src/content/browser/speech/endpointer/energy_endpointer.cc
new file mode 100644
index 0000000..fc1d871
--- /dev/null
+++ b/src/content/browser/speech/endpointer/energy_endpointer.cc

@@ -0,0 +1,379 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// To know more about the algorithm used and the original code which this is
+// based of, see
+// https://wiki.corp.google.com/twiki/bin/view/Main/ChromeGoogleCodeXRef
+
+#include "content/browser/speech/endpointer/energy_endpointer.h"
+
+#include <math.h>
+#include <stddef.h>
+
+#include "base/logging.h"
+#include "base/macros.h"
+
+namespace {
+
+// Returns the RMS (quadratic mean) of the input signal.
+float RMS(const int16_t* samples, int num_samples) {
+  int64_t ssq_int64 = 0;
+  int64_t sum_int64 = 0;
+  for (int i = 0; i < num_samples; ++i) {
+    sum_int64 += samples[i];
+    ssq_int64 += samples[i] * samples[i];
+  }
+  // now convert to floats.
+  double sum = static_cast<double>(sum_int64);
+  sum /= num_samples;
+  double ssq = static_cast<double>(ssq_int64);
+  return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum)));
+}
+
+int64_t Secs2Usecs(float seconds) {
+  return static_cast<int64_t>(0.5 + (1.0e6 * seconds));
+}
+
+float GetDecibel(float value) {
+  if (value > 1.0e-100)
+    return 20 * log10(value);
+  return -2000.0;
+}
+
+}  // namespace
+
+namespace content {
+
+// Stores threshold-crossing histories for making decisions about the speech
+// state.
+class EnergyEndpointer::HistoryRing {
+ public:
+  HistoryRing() : insertion_index_(0) {}
+
+  // Resets the ring to |size| elements each with state |initial_state|
+  void SetRing(int size, bool initial_state);
+
+  // Inserts a new entry into the ring and drops the oldest entry.
+  void Insert(int64_t time_us, bool decision);
+
+  // Returns the time in microseconds of the most recently added entry.
+  int64_t EndTime() const;
+
+  // Returns the sum of all intervals during which 'decision' is true within
+  // the time in seconds specified by 'duration'. The returned interval is
+  // in seconds.
+  float RingSum(float duration_sec);
+
+ private:
+  struct DecisionPoint {
+    int64_t time_us;
+    bool decision;
+  };
+
+  std::vector<DecisionPoint> decision_points_;
+  int insertion_index_;  // Index at which the next item gets added/inserted.
+
+  DISALLOW_COPY_AND_ASSIGN(HistoryRing);
+};
+
+void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) {
+  insertion_index_ = 0;
+  decision_points_.clear();
+  DecisionPoint init = { -1, initial_state };
+  decision_points_.resize(size, init);
+}
+
+void EnergyEndpointer::HistoryRing::Insert(int64_t time_us, bool decision) {
+  decision_points_[insertion_index_].time_us = time_us;
+  decision_points_[insertion_index_].decision = decision;
+  insertion_index_ = (insertion_index_ + 1) % decision_points_.size();
+}
+
+int64_t EnergyEndpointer::HistoryRing::EndTime() const {
+  int ind = insertion_index_ - 1;
+  if (ind < 0)
+    ind = decision_points_.size() - 1;
+  return decision_points_[ind].time_us;
+}
+
+float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) {
+  if (decision_points_.empty())
+    return 0.0;
+
+  int64_t sum_us = 0;
+  int ind = insertion_index_ - 1;
+  if (ind < 0)
+    ind = decision_points_.size() - 1;
+  int64_t end_us = decision_points_[ind].time_us;
+  bool is_on = decision_points_[ind].decision;
+  int64_t start_us =
+      end_us - static_cast<int64_t>(0.5 + (1.0e6 * duration_sec));
+  if (start_us < 0)
+    start_us = 0;
+  size_t n_summed = 1;  // n points ==> (n-1) intervals
+  while ((decision_points_[ind].time_us > start_us) &&
+         (n_summed < decision_points_.size())) {
+    --ind;
+    if (ind < 0)
+      ind = decision_points_.size() - 1;
+    if (is_on)
+      sum_us += end_us - decision_points_[ind].time_us;
+    is_on = decision_points_[ind].decision;
+    end_us = decision_points_[ind].time_us;
+    n_summed++;
+  }
+
+  return 1.0e-6f * sum_us;  //  Returns total time that was super threshold.
+}
+
+EnergyEndpointer::EnergyEndpointer()
+    : status_(EP_PRE_SPEECH),
+      offset_confirm_dur_sec_(0),
+      endpointer_time_us_(0),
+      fast_update_frames_(0),
+      frame_counter_(0),
+      max_window_dur_(4.0),
+      sample_rate_(0),
+      history_(new HistoryRing()),
+      decision_threshold_(0),
+      estimating_environment_(false),
+      noise_level_(0),
+      rms_adapt_(0),
+      start_lag_(0),
+      end_lag_(0),
+      user_input_start_time_us_(0) {
+}
+
+EnergyEndpointer::~EnergyEndpointer() {
+}
+
+int EnergyEndpointer::TimeToFrame(float time) const {
+  return static_cast<int32_t>(0.5 + (time / params_.frame_period()));
+}
+
+void EnergyEndpointer::Restart(bool reset_threshold) {
+  status_ = EP_PRE_SPEECH;
+  user_input_start_time_us_ = 0;
+
+  if (reset_threshold) {
+    decision_threshold_ = params_.decision_threshold();
+    rms_adapt_ = decision_threshold_;
+    noise_level_ = params_.decision_threshold() / 2.0f;
+    frame_counter_ = 0;  // Used for rapid initial update of levels.
+  }
+
+  // Set up the memories to hold the history windows.
+  history_->SetRing(TimeToFrame(max_window_dur_), false);
+
+  // Flag that indicates that current input should be used for
+  // estimating the environment. The user has not yet started input
+  // by e.g. pressed the push-to-talk button. By default, this is
+  // false for backward compatibility.
+  estimating_environment_ = false;
+}
+
+void EnergyEndpointer::Init(const EnergyEndpointerParams& params) {
+  params_ = params;
+
+  // Find the longest history interval to be used, and make the ring
+  // large enough to accommodate that number of frames.  NOTE: This
+  // depends upon ep_frame_period being set correctly in the factory
+  // that did this instantiation.
+  max_window_dur_ = params_.onset_window();
+  if (params_.speech_on_window() > max_window_dur_)
+    max_window_dur_ = params_.speech_on_window();
+  if (params_.offset_window() > max_window_dur_)
+    max_window_dur_ = params_.offset_window();
+  Restart(true);
+
+  offset_confirm_dur_sec_ = params_.offset_window() -
+                            params_.offset_confirm_dur();
+  if (offset_confirm_dur_sec_ < 0.0)
+    offset_confirm_dur_sec_ = 0.0;
+
+  user_input_start_time_us_ = 0;
+
+  // Flag that indicates that  current input should be used for
+  // estimating the environment. The user has not yet started input
+  // by e.g. pressed the push-to-talk button. By default, this is
+  // false for backward compatibility.
+  estimating_environment_ = false;
+  // The initial value of the noise and speech levels is inconsequential.
+  // The level of the first frame will overwrite these values.
+  noise_level_ = params_.decision_threshold() / 2.0f;
+  fast_update_frames_ =
+      static_cast<int64_t>(params_.fast_update_dur() / params_.frame_period());
+
+  frame_counter_ = 0;  // Used for rapid initial update of levels.
+
+  sample_rate_ = params_.sample_rate();
+  start_lag_ = static_cast<int>(sample_rate_ /
+                                params_.max_fundamental_frequency());
+  end_lag_ = static_cast<int>(sample_rate_ /
+                              params_.min_fundamental_frequency());
+}
+
+void EnergyEndpointer::StartSession() {
+  Restart(true);
+}
+
+void EnergyEndpointer::EndSession() {
+  status_ = EP_POST_SPEECH;
+}
+
+void EnergyEndpointer::SetEnvironmentEstimationMode() {
+  Restart(true);
+  estimating_environment_ = true;
+}
+
+void EnergyEndpointer::SetUserInputMode() {
+  estimating_environment_ = false;
+  user_input_start_time_us_ = endpointer_time_us_;
+}
+
+void EnergyEndpointer::ProcessAudioFrame(int64_t time_us,
+                                         const int16_t* samples,
+                                         int num_samples,
+                                         float* rms_out) {
+  endpointer_time_us_ = time_us;
+  float rms = RMS(samples, num_samples);
+
+  // Check that this is user input audio vs. pre-input adaptation audio.
+  // Input audio starts when the user indicates start of input, by e.g.
+  // pressing push-to-talk. Audio received prior to that is used to update
+  // noise and speech level estimates.
+  if (!estimating_environment_) {
+    bool decision = false;
+    if ((endpointer_time_us_ - user_input_start_time_us_) <
+        Secs2Usecs(params_.contamination_rejection_period())) {
+      decision = false;
+      DVLOG(1) << "decision: forced to false, time: " << endpointer_time_us_;
+    } else {
+      decision = (rms > decision_threshold_);
+    }
+
+    history_->Insert(endpointer_time_us_, decision);
+
+    switch (status_) {
+      case EP_PRE_SPEECH:
+        if (history_->RingSum(params_.onset_window()) >
+            params_.onset_detect_dur()) {
+          status_ = EP_POSSIBLE_ONSET;
+        }
+        break;
+
+      case EP_POSSIBLE_ONSET: {
+        float tsum = history_->RingSum(params_.onset_window());
+        if (tsum > params_.onset_confirm_dur()) {
+          status_ = EP_SPEECH_PRESENT;
+        } else {  // If signal is not maintained, drop back to pre-speech.
+          if (tsum <= params_.onset_detect_dur())
+            status_ = EP_PRE_SPEECH;
+        }
+        break;
+      }
+
+      case EP_SPEECH_PRESENT: {
+        // To induce hysteresis in the state residency, we allow a
+        // smaller residency time in the on_ring, than was required to
+        // enter the SPEECH_PERSENT state.
+        float on_time = history_->RingSum(params_.speech_on_window());
+        if (on_time < params_.on_maintain_dur())
+          status_ = EP_POSSIBLE_OFFSET;
+        break;
+      }
+
+      case EP_POSSIBLE_OFFSET:
+        if (history_->RingSum(params_.offset_window()) <=
+            offset_confirm_dur_sec_) {
+          // Note that this offset time may be beyond the end
+          // of the input buffer in a real-time system.  It will be up
+          // to the RecognizerSession to decide what to do.
+          status_ = EP_PRE_SPEECH;  // Automatically reset for next utterance.
+        } else {  // If speech picks up again we allow return to SPEECH_PRESENT.
+          if (history_->RingSum(params_.speech_on_window()) >=
+              params_.on_maintain_dur())
+            status_ = EP_SPEECH_PRESENT;
+        }
+        break;
+
+      default:
+        LOG(WARNING) << "Invalid case in switch: " << status_;
+        break;
+    }
+
+    // If this is a quiet, non-speech region, slowly adapt the detection
+    // threshold to be about 6dB above the average RMS.
+    if ((!decision) && (status_ == EP_PRE_SPEECH)) {
+      decision_threshold_ = (0.98f * decision_threshold_) + (0.02f * 2 * rms);
+      rms_adapt_ = decision_threshold_;
+    } else {
+      // If this is in a speech region, adapt the decision threshold to
+      // be about 10dB below the average RMS. If the noise level is high,
+      // the threshold is pushed up.
+      // Adaptation up to a higher level is 5 times faster than decay to
+      // a lower level.
+      if ((status_ == EP_SPEECH_PRESENT) && decision) {
+        if (rms_adapt_ > rms) {
+          rms_adapt_ = (0.99f * rms_adapt_) + (0.01f * rms);
+        } else {
+          rms_adapt_ = (0.95f * rms_adapt_) + (0.05f * rms);
+        }
+        float target_threshold = 0.3f * rms_adapt_ +  noise_level_;
+        decision_threshold_ = (.90f * decision_threshold_) +
+                              (0.10f * target_threshold);
+      }
+    }
+
+    // Set a floor
+    if (decision_threshold_ < params_.min_decision_threshold())
+      decision_threshold_ = params_.min_decision_threshold();
+  }
+
+  // Update speech and noise levels.
+  UpdateLevels(rms);
+  ++frame_counter_;
+
+  if (rms_out)
+    *rms_out = GetDecibel(rms);
+}
+
+float EnergyEndpointer::GetNoiseLevelDb() const {
+  return GetDecibel(noise_level_);
+}
+
+void EnergyEndpointer::UpdateLevels(float rms) {
+  // Update quickly initially. We assume this is noise and that
+  // speech is 6dB above the noise.
+  if (frame_counter_ < fast_update_frames_) {
+    // Alpha increases from 0 to (k-1)/k where k is the number of time
+    // steps in the initial adaptation period.
+    float alpha = static_cast<float>(frame_counter_) /
+        static_cast<float>(fast_update_frames_);
+    noise_level_ = (alpha * noise_level_) + ((1 - alpha) * rms);
+    DVLOG(1) << "FAST UPDATE, frame_counter_ " << frame_counter_
+             << ", fast_update_frames_ " << fast_update_frames_;
+  } else {
+    // Update Noise level. The noise level adapts quickly downward, but
+    // slowly upward. The noise_level_ parameter is not currently used
+    // for threshold adaptation. It is used for UI feedback.
+    if (noise_level_ < rms)
+      noise_level_ = (0.999f * noise_level_) + (0.001f * rms);
+    else
+      noise_level_ = (0.95f * noise_level_) + (0.05f * rms);
+  }
+  if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) {
+    decision_threshold_ = noise_level_ * 2; // 6dB above noise level.
+    // Set a floor
+    if (decision_threshold_ < params_.min_decision_threshold())
+      decision_threshold_ = params_.min_decision_threshold();
+  }
+}
+
+EpStatus EnergyEndpointer::Status(int64_t* status_time) const {
+  *status_time = history_->EndTime();
+  return status_;
+}
+
+}  // namespace content

diff --git a/src/content/browser/speech/endpointer/energy_endpointer.h b/src/content/browser/speech/endpointer/energy_endpointer.h
new file mode 100644
index 0000000..7b0b292
--- /dev/null
+++ b/src/content/browser/speech/endpointer/energy_endpointer.h

@@ -0,0 +1,161 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The EnergyEndpointer class finds likely speech onset and offset points.
+//
+// The implementation described here is about the simplest possible.
+// It is based on timings of threshold crossings for overall signal
+// RMS. It is suitable for light weight applications.
+//
+// As written, the basic idea is that one specifies intervals that
+// must be occupied by super- and sub-threshold energy levels, and
+// defers decisions re onset and offset times until these
+// specifications have been met.  Three basic intervals are tested: an
+// onset window, a speech-on window, and an offset window.  We require
+// super-threshold to exceed some mimimum total durations in the onset
+// and speech-on windows before declaring the speech onset time, and
+// we specify a required sub-threshold residency in the offset window
+// before declaring speech offset. As the various residency requirements are
+// met, the EnergyEndpointer instance assumes various states, and can return the
+// ID of these states to the client (see EpStatus below).
+//
+// The levels of the speech and background noise are continuously updated. It is
+// important that the background noise level be estimated initially for
+// robustness in noisy conditions. The first frames are assumed to be background
+// noise and a fast update rate is used for the noise level. The duration for
+// fast update is controlled by the fast_update_dur_ paramter.
+//
+// If used in noisy conditions, the endpointer should be started and run in the
+// EnvironmentEstimation mode, for at least 200ms, before switching to
+// UserInputMode.
+// Audio feedback contamination can appear in the input audio, if not cut
+// out or handled by echo cancellation. Audio feedback can trigger a false
+// accept. The false accepts can be ignored by setting
+// ep_contamination_rejection_period.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_
+
+#include <stdint.h>
+
+#include <memory>
+#include <vector>
+
+#include "base/macros.h"
+#include "content/browser/speech/endpointer/energy_endpointer_params.h"
+#include "content/common/content_export.h"
+
+namespace content {
+
+// Endpointer status codes
+enum EpStatus {
+  EP_PRE_SPEECH = 10,
+  EP_POSSIBLE_ONSET,
+  EP_SPEECH_PRESENT,
+  EP_POSSIBLE_OFFSET,
+  EP_POST_SPEECH,
+};
+
+class CONTENT_EXPORT EnergyEndpointer {
+ public:
+  // The default construction MUST be followed by Init(), before any
+  // other use can be made of the instance.
+  EnergyEndpointer();
+  virtual ~EnergyEndpointer();
+
+  void Init(const EnergyEndpointerParams& params);
+
+  // Start the endpointer. This should be called at the beginning of a session.
+  void StartSession();
+
+  // Stop the endpointer.
+  void EndSession();
+
+  // Start environment estimation. Audio will be used for environment estimation
+  // i.e. noise level estimation.
+  void SetEnvironmentEstimationMode();
+
+  // Start user input. This should be called when the user indicates start of
+  // input, e.g. by pressing a button.
+  void SetUserInputMode();
+
+  // Computes the next input frame and modifies EnergyEndpointer status as
+  // appropriate based on the computation.
+  void ProcessAudioFrame(int64_t time_us,
+                         const int16_t* samples,
+                         int num_samples,
+                         float* rms_out);
+
+  // Returns the current state of the EnergyEndpointer and the time
+  // corresponding to the most recently computed frame.
+  EpStatus Status(int64_t* status_time_us) const;
+
+  bool estimating_environment() const {
+    return estimating_environment_;
+  }
+
+  // Returns estimated noise level in dB.
+  float GetNoiseLevelDb() const;
+
+ private:
+  class HistoryRing;
+
+  // Resets the endpointer internal state.  If reset_threshold is true, the
+  // state will be reset completely, including adaptive thresholds and the
+  // removal of all history information.
+  void Restart(bool reset_threshold);
+
+  // Update internal speech and noise levels.
+  void UpdateLevels(float rms);
+
+  // Returns the number of frames (or frame number) corresponding to
+  // the 'time' (in seconds).
+  int TimeToFrame(float time) const;
+
+  EpStatus status_;  // The current state of this instance.
+  float offset_confirm_dur_sec_;  // max on time allowed to confirm POST_SPEECH
+  int64_t
+      endpointer_time_us_;  // Time of the most recently received audio frame.
+  int64_t
+      fast_update_frames_;  // Number of frames for initial level adaptation.
+  int64_t
+      frame_counter_;     // Number of frames seen. Used for initial adaptation.
+  float max_window_dur_;  // Largest search window size (seconds)
+  float sample_rate_;  // Sampling rate.
+
+  // Ring buffers to hold the speech activity history.
+  std::unique_ptr<HistoryRing> history_;
+
+  // Configuration parameters.
+  EnergyEndpointerParams params_;
+
+  // RMS which must be exceeded to conclude frame is speech.
+  float decision_threshold_;
+
+  // Flag to indicate that audio should be used to estimate environment, prior
+  // to receiving user input.
+  bool estimating_environment_;
+
+  // Estimate of the background noise level. Used externally for UI feedback.
+  float noise_level_;
+
+  // An adaptive threshold used to update decision_threshold_ when appropriate.
+  float rms_adapt_;
+
+  // Start lag corresponds to the highest fundamental frequency.
+  int start_lag_;
+
+  // End lag corresponds to the lowest fundamental frequency.
+  int end_lag_;
+
+  // Time when mode switched from environment estimation to user input. This
+  // is used to time forced rejection of audio feedback contamination.
+  int64_t user_input_start_time_us_;
+
+  DISALLOW_COPY_AND_ASSIGN(EnergyEndpointer);
+};
+
+}  // namespace content
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_

diff --git a/src/content/browser/speech/endpointer/energy_endpointer_params.cc b/src/content/browser/speech/endpointer/energy_endpointer_params.cc
new file mode 100644
index 0000000..9cdf024
--- /dev/null
+++ b/src/content/browser/speech/endpointer/energy_endpointer_params.cc

@@ -0,0 +1,53 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/browser/speech/endpointer/energy_endpointer_params.h"
+
+namespace content {
+
+EnergyEndpointerParams::EnergyEndpointerParams() {
+  SetDefaults();
+}
+
+void EnergyEndpointerParams::SetDefaults() {
+  frame_period_ = 0.01f;
+  frame_duration_ = 0.01f;
+  endpoint_margin_ = 0.2f;
+  onset_window_ = 0.15f;
+  speech_on_window_ = 0.4f;
+  offset_window_ = 0.15f;
+  onset_detect_dur_ = 0.09f;
+  onset_confirm_dur_ = 0.075f;
+  on_maintain_dur_ = 0.10f;
+  offset_confirm_dur_ = 0.12f;
+  decision_threshold_ = 150.0f;
+  min_decision_threshold_ = 50.0f;
+  fast_update_dur_ = 0.2f;
+  sample_rate_ = 8000.0f;
+  min_fundamental_frequency_ = 57.143f;
+  max_fundamental_frequency_ = 400.0f;
+  contamination_rejection_period_ = 0.25f;
+}
+
+void EnergyEndpointerParams::operator=(const EnergyEndpointerParams& source) {
+  frame_period_ = source.frame_period();
+  frame_duration_ = source.frame_duration();
+  endpoint_margin_ = source.endpoint_margin();
+  onset_window_ = source.onset_window();
+  speech_on_window_ = source.speech_on_window();
+  offset_window_ = source.offset_window();
+  onset_detect_dur_ = source.onset_detect_dur();
+  onset_confirm_dur_ = source.onset_confirm_dur();
+  on_maintain_dur_ = source.on_maintain_dur();
+  offset_confirm_dur_ = source.offset_confirm_dur();
+  decision_threshold_ = source.decision_threshold();
+  min_decision_threshold_ = source.min_decision_threshold();
+  fast_update_dur_ = source.fast_update_dur();
+  sample_rate_ = source.sample_rate();
+  min_fundamental_frequency_ = source.min_fundamental_frequency();
+  max_fundamental_frequency_ = source.max_fundamental_frequency();
+  contamination_rejection_period_ = source.contamination_rejection_period();
+}
+
+}  //  namespace content

diff --git a/src/content/browser/speech/endpointer/energy_endpointer_params.h b/src/content/browser/speech/endpointer/energy_endpointer_params.h
new file mode 100644
index 0000000..1510435
--- /dev/null
+++ b/src/content/browser/speech/endpointer/energy_endpointer_params.h

@@ -0,0 +1,137 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+#define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
+
+#include "content/common/content_export.h"
+
+namespace content {
+
+// Input parameters for the EnergyEndpointer class.
+class CONTENT_EXPORT EnergyEndpointerParams {
+ public:
+  EnergyEndpointerParams();
+
+  void SetDefaults();
+
+  void operator=(const EnergyEndpointerParams& source);
+
+  // Accessors and mutators
+  float frame_period() const { return frame_period_; }
+  void set_frame_period(float frame_period) {
+    frame_period_ = frame_period;
+  }
+
+  float frame_duration() const { return frame_duration_; }
+  void set_frame_duration(float frame_duration) {
+    frame_duration_ = frame_duration;
+  }
+
+  float endpoint_margin() const { return endpoint_margin_; }
+  void set_endpoint_margin(float endpoint_margin) {
+    endpoint_margin_ = endpoint_margin;
+  }
+
+  float onset_window() const { return onset_window_; }
+  void set_onset_window(float onset_window) { onset_window_ = onset_window; }
+
+  float speech_on_window() const { return speech_on_window_; }
+  void set_speech_on_window(float speech_on_window) {
+    speech_on_window_ = speech_on_window;
+  }
+
+  float offset_window() const { return offset_window_; }
+  void set_offset_window(float offset_window) {
+    offset_window_ = offset_window;
+  }
+
+  float onset_detect_dur() const { return onset_detect_dur_; }
+  void set_onset_detect_dur(float onset_detect_dur) {
+    onset_detect_dur_ = onset_detect_dur;
+  }
+
+  float onset_confirm_dur() const { return onset_confirm_dur_; }
+  void set_onset_confirm_dur(float onset_confirm_dur) {
+    onset_confirm_dur_ = onset_confirm_dur;
+  }
+
+  float on_maintain_dur() const { return on_maintain_dur_; }
+  void set_on_maintain_dur(float on_maintain_dur) {
+    on_maintain_dur_ = on_maintain_dur;
+  }
+
+  float offset_confirm_dur() const { return offset_confirm_dur_; }
+  void set_offset_confirm_dur(float offset_confirm_dur) {
+    offset_confirm_dur_ = offset_confirm_dur;
+  }
+
+  float decision_threshold() const { return decision_threshold_; }
+  void set_decision_threshold(float decision_threshold) {
+    decision_threshold_ = decision_threshold;
+  }
+
+  float min_decision_threshold() const { return min_decision_threshold_; }
+  void set_min_decision_threshold(float min_decision_threshold) {
+    min_decision_threshold_ = min_decision_threshold;
+  }
+
+  float fast_update_dur() const { return fast_update_dur_; }
+  void set_fast_update_dur(float fast_update_dur) {
+    fast_update_dur_ = fast_update_dur;
+  }
+
+  float sample_rate() const { return sample_rate_; }
+  void set_sample_rate(float sample_rate) { sample_rate_ = sample_rate; }
+
+  float min_fundamental_frequency() const { return min_fundamental_frequency_; }
+  void set_min_fundamental_frequency(float min_fundamental_frequency) {
+    min_fundamental_frequency_ = min_fundamental_frequency;
+  }
+
+  float max_fundamental_frequency() const { return max_fundamental_frequency_; }
+  void set_max_fundamental_frequency(float max_fundamental_frequency) {
+    max_fundamental_frequency_ = max_fundamental_frequency;
+  }
+
+  float contamination_rejection_period() const {
+    return contamination_rejection_period_;
+  }
+  void set_contamination_rejection_period(
+      float contamination_rejection_period) {
+    contamination_rejection_period_ = contamination_rejection_period;
+  }
+
+ private:
+  float frame_period_;  // Frame period
+  float frame_duration_;  // Window size
+  float onset_window_;  // Interval scanned for onset activity
+  float speech_on_window_;  // Inverval scanned for ongoing speech
+  float offset_window_;  // Interval scanned for offset evidence
+  float offset_confirm_dur_;  // Silence duration required to confirm offset
+  float decision_threshold_;  // Initial rms detection threshold
+  float min_decision_threshold_;  // Minimum rms detection threshold
+  float fast_update_dur_;  // Period for initial estimation of levels.
+  float sample_rate_;  // Expected sample rate.
+
+  // Time to add on either side of endpoint threshold crossings
+  float endpoint_margin_;
+  // Total dur within onset_window required to enter ONSET state
+  float onset_detect_dur_;
+  // Total on time within onset_window required to enter SPEECH_ON state
+  float onset_confirm_dur_;
+  // Minimum dur in SPEECH_ON state required to maintain ON state
+  float on_maintain_dur_;
+  // Minimum fundamental frequency for autocorrelation.
+  float min_fundamental_frequency_;
+  // Maximum fundamental frequency for autocorrelation.
+  float max_fundamental_frequency_;
+  // Period after start of user input that above threshold values are ignored.
+  // This is to reject audio feedback contamination.
+  float contamination_rejection_period_;
+};
+
+}  //  namespace content
+
+#endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_PARAMS_H_
commit	878ef3e23600a250d4cfb86f37d447f3ed666ca9	[log] [tgz]
author	David Ghandehari <iffy@google.com>	Sun Nov 13 21:06:36 2016 -0800
committer	David Ghandehari <iffy@google.com>	Sun Nov 13 21:06:36 2016 -0800
tree	e32d35b879b957dcb8bf49e8aff95e9e7090b314
parent	07c204f02a8437b2b0d3a3536a986b4ee0f3c2e1 [diff]