media/mojo/mojom/speech_recognition_result.h - cobalt - Git at Google

 // Copyright 2021 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef MEDIA_MOJO_MOJOM_SPEECH_RECOGNITION_RESULT_H_
 #define MEDIA_MOJO_MOJOM_SPEECH_RECOGNITION_RESULT_H_

 #include <string>
 #include <vector>

 #include "base/time/time.h"
 #include "third_party/abseil-cpp/absl/types/optional.h"

 namespace media {

 struct HypothesisParts {
   HypothesisParts();
   HypothesisParts(const std::vector<std::string> part, base::TimeDelta offset);
   HypothesisParts(const HypothesisParts&);
   HypothesisParts(HypothesisParts&&);
   HypothesisParts& operator=(const HypothesisParts&);
   HypothesisParts& operator=(HypothesisParts&&);
   ~HypothesisParts();

   bool operator==(const HypothesisParts& rhs) const;

   // A section of the final transcription text. Either an entire word or single
   // character (depending on the language) with adjacent punctuation. There will
   // usually only be one value here. If formatting is enabled in the speech
   // recognition, then the raw text will be included as the second element.
   std::vector<std::string> text;

   // Time offset from this event's |audio_start_time| defined below. Time
   // offset from this event's |audio_start_time| defined below. We enforce the
   // following invariant: 0 <= hypothesis_part_offset < |audio_end_time -
   // audio_start_time|.
   base::TimeDelta hypothesis_part_offset;
 };

 struct TimingInformation {
   TimingInformation();
   TimingInformation(const TimingInformation&);
   TimingInformation(TimingInformation&&);
   TimingInformation& operator=(const TimingInformation&);
   TimingInformation& operator=(TimingInformation&&);
   ~TimingInformation();

   bool operator==(const TimingInformation& rhs) const;

   // Start time in audio time from the start of the SODA session.
   // This time measures the amount of audio input into SODA.
   base::TimeDelta audio_start_time;

   // Elapsed processed audio from first frame after preamble.
   base::TimeDelta audio_end_time;

   // The timing information for each word/letter in the transription.
   // HypothesisPartsInResult was introduced in min version 1 in
   // chromeos/services/machine_learning/public/mojom/soda.mojom. Therefore, it
   // must be optional. Hypothesis parts maybe non-empty optional containing a
   // zero length vector if no words were spoken during the event's time span.
   absl::optional<std::vector<HypothesisParts>> hypothesis_parts;
 };

 // A speech recognition result created by the speech service and passed to the
 // SpeechRecognitionRecognizerClient.
 struct SpeechRecognitionResult {
   SpeechRecognitionResult();
   SpeechRecognitionResult(const std::string transcript, bool is_final);
   SpeechRecognitionResult(const SpeechRecognitionResult&);
   SpeechRecognitionResult(SpeechRecognitionResult&&);
   SpeechRecognitionResult& operator=(const SpeechRecognitionResult&);
   SpeechRecognitionResult& operator=(SpeechRecognitionResult&&);
   ~SpeechRecognitionResult();

   bool operator==(const SpeechRecognitionResult& rhs) const;

   std::string transcription;

   // A flag indicating whether the result is final. If true, the result is
   // locked in and the next result returned will not overlap with the previous
   // final result.
   bool is_final = false;

   // Timing information for the current transcription.
   absl::optional<TimingInformation> timing_information;
 };

 }  // namespace media

 #endif  // MEDIA_MOJO_MOJOM_SPEECH_RECOGNITION_RESULT_H_
	// Copyright 2021 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef MEDIA_MOJO_MOJOM_SPEECH_RECOGNITION_RESULT_H_
	#define MEDIA_MOJO_MOJOM_SPEECH_RECOGNITION_RESULT_H_

	#include <string>
	#include <vector>

	#include "base/time/time.h"
	#include "third_party/abseil-cpp/absl/types/optional.h"

	namespace media {

	struct HypothesisParts {
	HypothesisParts();
	HypothesisParts(const std::vector<std::string> part, base::TimeDelta offset);
	HypothesisParts(const HypothesisParts&);
	HypothesisParts(HypothesisParts&&);
	HypothesisParts& operator=(const HypothesisParts&);
	HypothesisParts& operator=(HypothesisParts&&);
	~HypothesisParts();

	bool operator==(const HypothesisParts& rhs) const;

	// A section of the final transcription text. Either an entire word or single
	// character (depending on the language) with adjacent punctuation. There will
	// usually only be one value here. If formatting is enabled in the speech
	// recognition, then the raw text will be included as the second element.
	std::vector<std::string> text;

	// Time offset from this event's \|audio_start_time\| defined below. Time
	// offset from this event's \|audio_start_time\| defined below. We enforce the
	// following invariant: 0 <= hypothesis_part_offset < \|audio_end_time -
	// audio_start_time\|.
	base::TimeDelta hypothesis_part_offset;
	};

	struct TimingInformation {
	TimingInformation();
	TimingInformation(const TimingInformation&);
	TimingInformation(TimingInformation&&);
	TimingInformation& operator=(const TimingInformation&);
	TimingInformation& operator=(TimingInformation&&);
	~TimingInformation();

	bool operator==(const TimingInformation& rhs) const;

	// Start time in audio time from the start of the SODA session.
	// This time measures the amount of audio input into SODA.
	base::TimeDelta audio_start_time;

	// Elapsed processed audio from first frame after preamble.
	base::TimeDelta audio_end_time;

	// The timing information for each word/letter in the transription.
	// HypothesisPartsInResult was introduced in min version 1 in
	// chromeos/services/machine_learning/public/mojom/soda.mojom. Therefore, it
	// must be optional. Hypothesis parts maybe non-empty optional containing a
	// zero length vector if no words were spoken during the event's time span.
	absl::optional<std::vector<HypothesisParts>> hypothesis_parts;
	};

	// A speech recognition result created by the speech service and passed to the
	// SpeechRecognitionRecognizerClient.
	struct SpeechRecognitionResult {
	SpeechRecognitionResult();
	SpeechRecognitionResult(const std::string transcript, bool is_final);
	SpeechRecognitionResult(const SpeechRecognitionResult&);
	SpeechRecognitionResult(SpeechRecognitionResult&&);
	SpeechRecognitionResult& operator=(const SpeechRecognitionResult&);
	SpeechRecognitionResult& operator=(SpeechRecognitionResult&&);
	~SpeechRecognitionResult();

	bool operator==(const SpeechRecognitionResult& rhs) const;

	std::string transcription;

	// A flag indicating whether the result is final. If true, the result is
	// locked in and the next result returned will not overlap with the previous
	// final result.
	bool is_final = false;

	// Timing information for the current transcription.
	absl::optional<TimingInformation> timing_information;
	};

	} // namespace media

	#endif // MEDIA_MOJO_MOJOM_SPEECH_RECOGNITION_RESULT_H_