| /* |
| * Copyright 2016 Google Inc. All Rights Reserved. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "cobalt/speech/speech_recognizer.h" |
| |
| #include "base/bind.h" |
| #include "base/rand_util.h" |
| #include "base/string_number_conversions.h" |
| #include "base/string_util.h" |
| #include "base/utf_string_conversions.h" |
| #include "cobalt/loader/fetcher_factory.h" |
| #include "cobalt/network/network_module.h" |
| #include "net/base/escape.h" |
| #include "net/url_request/url_fetcher.h" |
| |
| namespace cobalt { |
| namespace speech { |
| |
| namespace { |
| const char kBaseStreamURL[] = |
| "https://www.google.com/speech-api/full-duplex/v1"; |
| // TODO: hide this key to somewhere else. |
| const char kSpeechAPIKey[] = ""; |
| const char kUp[] = "up"; |
| const char kDown[] = "down"; |
| const char kClient[] = "com.speech.tv"; |
| |
| GURL AppendPath(const GURL& url, const std::string& value) { |
| std::string path(url.path()); |
| |
| if (!path.empty()) path += "/"; |
| |
| path += net::EscapePath(value); |
| GURL::Replacements replacements; |
| replacements.SetPathStr(path); |
| return url.ReplaceComponents(replacements); |
| } |
| |
| GURL AppendQueryParameter(const GURL& url, const std::string& new_query, |
| const std::string& value) { |
| std::string query(url.query()); |
| |
| if (!query.empty()) query += "&"; |
| |
| query += net::EscapeQueryParamValue(new_query, true); |
| |
| if (!value.empty()) { |
| query += "=" + net::EscapeQueryParamValue(value, true); |
| } |
| |
| GURL::Replacements replacements; |
| replacements.SetQueryStr(query); |
| return url.ReplaceComponents(replacements); |
| } |
| |
| } // namespace |
| |
| SpeechRecognizer::SpeechRecognizer(network::NetworkModule* network_module, |
| const ResultCallback& result_callback, |
| const ErrorCallback& error_callback) |
| : network_module_(network_module), |
| thread_("speech_recognizer"), |
| started_(false), |
| result_callback_(result_callback), |
| error_callback_(error_callback) { |
| thread_.StartWithOptions(base::Thread::Options(MessageLoop::TYPE_IO, 0)); |
| } |
| |
| SpeechRecognizer::~SpeechRecognizer() { |
| Stop(); |
| } |
| |
| void SpeechRecognizer::Start(const SpeechRecognitionConfig& config, |
| int sample_rate) { |
| // Called by the speech recognition manager thread. |
| thread_.message_loop()->PostTask( |
| FROM_HERE, base::Bind(&SpeechRecognizer::StartInternal, |
| base::Unretained(this), config, sample_rate)); |
| } |
| |
| void SpeechRecognizer::Stop() { |
| // Called by the speech recognition manager thread. |
| thread_.message_loop()->PostTask( |
| FROM_HERE, |
| base::Bind(&SpeechRecognizer::StopInternal, base::Unretained(this))); |
| } |
| |
| void SpeechRecognizer::RecognizeAudio(scoped_ptr<AudioBus> audio_bus, |
| bool is_last_chunk) { |
| // Called by the speech recognition manager thread. |
| thread_.message_loop()->PostTask( |
| FROM_HERE, base::Bind(&SpeechRecognizer::UploadAudioDataInternal, |
| base::Unretained(this), base::Passed(&audio_bus), |
| is_last_chunk)); |
| } |
| |
| void SpeechRecognizer::OnURLFetchDownloadData( |
| const net::URLFetcher* source, scoped_ptr<std::string> download_data) { |
| DCHECK_EQ(thread_.message_loop(), MessageLoop::current()); |
| // TODO: Parse the serialized protocol buffers data. |
| NOTIMPLEMENTED(); |
| |
| UNREFERENCED_PARAMETER(source); |
| UNREFERENCED_PARAMETER(download_data); |
| } |
| |
| void SpeechRecognizer::OnURLFetchComplete(const net::URLFetcher* source) { |
| DCHECK_EQ(thread_.message_loop(), MessageLoop::current()); |
| UNREFERENCED_PARAMETER(source); |
| started_ = false; |
| } |
| |
| void SpeechRecognizer::StartInternal(const SpeechRecognitionConfig& config, |
| int sample_rate) { |
| DCHECK_EQ(thread_.message_loop(), MessageLoop::current()); |
| |
| if (started_) { |
| // Recognizer is already started. |
| return; |
| } |
| started_ = true; |
| |
| encoder_.reset(new AudioEncoderFlac(sample_rate)); |
| |
| // Required for streaming on both up and down connections. |
| std::string pair = base::Uint64ToString(base::RandUint64()); |
| |
| // Set up down stream first. |
| GURL down_url(kBaseStreamURL); |
| down_url = AppendPath(down_url, kDown); |
| down_url = AppendQueryParameter(down_url, "pair", pair); |
| // Use protobuffer as the output format. |
| down_url = AppendQueryParameter(down_url, "output", "pb"); |
| |
| downstream_fetcher_.reset( |
| net::URLFetcher::Create(down_url, net::URLFetcher::GET, this)); |
| downstream_fetcher_->SetRequestContext( |
| network_module_->url_request_context_getter()); |
| downstream_fetcher_->Start(); |
| |
| // Up stream. |
| GURL up_url(kBaseStreamURL); |
| up_url = AppendPath(up_url, kUp); |
| up_url = AppendQueryParameter(up_url, "client", kClient); |
| up_url = AppendQueryParameter(up_url, "pair", pair); |
| up_url = AppendQueryParameter(up_url, "output", "pb"); |
| up_url = AppendQueryParameter(up_url, "key", kSpeechAPIKey); |
| |
| if (!config.lang.empty()) { |
| up_url = AppendQueryParameter(up_url, "lang", config.lang); |
| } |
| |
| if (config.max_alternatives) { |
| up_url = AppendQueryParameter(up_url, "maxAlternatives", |
| base::UintToString(config.max_alternatives)); |
| } |
| |
| if (config.continuous) { |
| up_url = AppendQueryParameter(up_url, "continuous", ""); |
| } |
| if (config.interim_results) { |
| up_url = AppendQueryParameter(up_url, "interim", ""); |
| } |
| |
| upstream_fetcher_.reset( |
| net::URLFetcher::Create(up_url, net::URLFetcher::POST, this)); |
| upstream_fetcher_->SetRequestContext( |
| network_module_->url_request_context_getter()); |
| upstream_fetcher_->SetChunkedUpload(encoder_->GetMimeType()); |
| upstream_fetcher_->Start(); |
| } |
| |
| void SpeechRecognizer::StopInternal() { |
| DCHECK_EQ(thread_.message_loop(), MessageLoop::current()); |
| |
| if (!started_) { |
| // Recognizer is not started. |
| return; |
| } |
| started_ = false; |
| |
| upstream_fetcher_.reset(); |
| downstream_fetcher_.reset(); |
| encoder_.reset(); |
| } |
| |
| void SpeechRecognizer::UploadAudioDataInternal(scoped_ptr<AudioBus> audio_bus, |
| bool is_last_chunk) { |
| DCHECK_EQ(thread_.message_loop(), MessageLoop::current()); |
| DCHECK(audio_bus); |
| |
| std::string encoded_audio_data; |
| if (encoder_) { |
| encoder_->Encode(audio_bus.get()); |
| if (is_last_chunk) { |
| encoder_->Finish(); |
| } |
| encoded_audio_data = encoder_->GetAndClearAvailableEncodedData(); |
| } |
| |
| if (upstream_fetcher_ && !encoded_audio_data.empty()) { |
| upstream_fetcher_->AppendChunkToUpload(encoded_audio_data, is_last_chunk); |
| } |
| } |
| |
| } // namespace speech |
| } // namespace cobalt |