| // Copyright 2014 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef MEDIA_FORMATS_MP4_MP4_STREAM_PARSER_H_ |
| #define MEDIA_FORMATS_MP4_MP4_STREAM_PARSER_H_ |
| |
| #include <stdint.h> |
| |
| #include <memory> |
| #include <set> |
| #include <vector> |
| |
| #include "base/compiler_specific.h" |
| #include "base/functional/callback.h" |
| #include "base/memory/raw_ptr.h" |
| #include "media/base/media_export.h" |
| #include "media/base/stream_parser.h" |
| #include "media/formats/common/offset_byte_queue.h" |
| #include "media/formats/mp4/parse_result.h" |
| #include "media/formats/mp4/track_run_iterator.h" |
| |
| #if BUILDFLAG(USE_PROPRIETARY_CODECS) |
| #include "media/formats/mp4/aac.h" |
| #endif |
| |
| namespace media::mp4 { |
| |
| struct Movie; |
| struct MovieHeader; |
| struct TrackHeader; |
| class BoxReader; |
| |
| class MEDIA_EXPORT MP4StreamParser : public StreamParser { |
| public: |
| MP4StreamParser(const std::set<int>& audio_object_types, |
| bool has_sbr, |
| bool has_flac, |
| bool has_iamf); |
| |
| MP4StreamParser(const MP4StreamParser&) = delete; |
| MP4StreamParser& operator=(const MP4StreamParser&) = delete; |
| |
| ~MP4StreamParser() override; |
| |
| void Init(InitCB init_cb, |
| NewConfigCB config_cb, |
| NewBuffersCB new_buffers_cb, |
| bool ignore_text_tracks, |
| EncryptedMediaInitDataCB encrypted_media_init_data_cb, |
| NewMediaSegmentCB new_segment_cb, |
| EndMediaSegmentCB end_of_segment_cb, |
| MediaLog* media_log) override; |
| void Flush() override; |
| bool GetGenerateTimestampsFlag() const override; |
| [[nodiscard]] bool AppendToParseBuffer(const uint8_t* buf, |
| size_t size) override; |
| [[nodiscard]] ParseStatus Parse(int max_pending_bytes_to_inspect) override; |
| |
| // Calculates the rotation value from the track header display matricies. |
| VideoTransformation CalculateRotation(const TrackHeader& track, |
| const MovieHeader& movie); |
| |
| private: |
| enum State { |
| kWaitingForInit, |
| kParsingBoxes, |
| kWaitingForSampleData, |
| kEmittingSamples, |
| kError |
| }; |
| |
| // Wrappers of `queue_` that observe constraint of `max_parse_offset_`. |
| void ModulatedPeek(const uint8_t** buf, int* size); |
| void ModulatedPeekAt(int64_t offset, const uint8_t** buf, int* size); |
| bool ModulatedTrim(int64_t max_offset); |
| |
| ParseResult ParseBox(); |
| bool ParseMoov(mp4::BoxReader* reader); |
| bool ParseMoof(mp4::BoxReader* reader); |
| |
| void OnEncryptedMediaInitData( |
| const std::vector<ProtectionSystemSpecificHeader>& headers); |
| |
| // To retain proper framing, each 'mdat' atom must be read; to limit memory |
| // usage, the atom's data needs to be discarded incrementally as frames are |
| // extracted from the stream. This function discards data from the stream up |
| // to |max_clear_offset|, updating the |mdat_tail_| value so that framing can |
| // be retained after all 'mdat' information has been read. |max_clear_offset| |
| // is the upper bound on what can be removed from |queue_|. Anything below |
| // this offset is no longer needed by the parser. |
| // Returns 'true' on success, 'false' if there was an error. |
| bool ReadAndDiscardMDATsUntil(int64_t max_clear_offset); |
| |
| void ChangeState(State new_state); |
| |
| bool EmitConfigs(); |
| #if BUILDFLAG(USE_PROPRIETARY_CODECS) |
| bool PrepareAACBuffer(const AAC& aac_config, |
| std::vector<uint8_t>* frame_buf, |
| std::vector<SubsampleEntry>* subsamples) const; |
| #endif |
| #if BUILDFLAG(ENABLE_PLATFORM_IAMF_AUDIO) |
| bool PrependIADescriptors(const IamfSpecificBox& iamf_box, |
| std::vector<uint8_t>* frame_buf, |
| std::vector<SubsampleEntry>* subsamples) const; |
| #endif // BUILDFLAG(ENABLE_PLATFORM_IAMF_AUDIO) |
| ParseResult EnqueueSample(BufferQueueMap* buffers); |
| bool SendAndFlushSamples(BufferQueueMap* buffers); |
| |
| void Reset(); |
| |
| // Checks to see if we have enough data in |queue_| to transition to |
| // kEmittingSamples and start enqueuing samples. |
| bool HaveEnoughDataToEnqueueSamples(); |
| |
| // Sets |highest_end_offset_| based on the data in |moov_| |
| // and |moof|. Returns true if |highest_end_offset_| was successfully |
| // computed. |
| bool ComputeHighestEndOffset(const MovieFragment& moof); |
| |
| State state_; |
| InitCB init_cb_; |
| NewConfigCB config_cb_; |
| NewBuffersCB new_buffers_cb_; |
| EncryptedMediaInitDataCB encrypted_media_init_data_cb_; |
| NewMediaSegmentCB new_segment_cb_; |
| EndMediaSegmentCB end_of_segment_cb_; |
| raw_ptr<MediaLog> media_log_; |
| |
| // Bytes of the mp4 stream. |
| // `max_parse_offset_` tracks the point in `queue_` beyond which no data may |
| // yet be parsed even if it is less than the queue's tail offset. This allows |
| // incremental parsing. `max_parse_offset_` must be less than or equal to the |
| // queue_'s current tail offset. Note that operations like Trim() and PeekAt() |
| // on the offset queue can involve offsets beyond tail or `max_parse_offset_`, |
| // so this parser must consider `max_parse_offset_` too when using those |
| // operations, otherwise more data than the amount indicated in the Parse() |
| // call's `max_pending_bytes_to_inspect` increment might be inspected in a |
| // Parse() call. See the various Modulated*() wrappers in this class. |
| // TODO(https://crbug.com/1286464): Consider reworking all these parsers to |
| // use a new type of queue that internally modulates the increment. |
| int64_t max_parse_offset_ = 0; |
| OffsetByteQueue queue_; |
| |
| // These two parameters are only valid in the |kEmittingSegments| state. |
| // |
| // |moof_head_| is the offset of the start of the most recently parsed moof |
| // block. All byte offsets in sample information are relative to this offset, |
| // as mandated by the Media Source spec. |
| int64_t moof_head_; |
| // |mdat_tail_| is the stream offset of the end of the current 'mdat' box. |
| // Valid iff it is greater than the head of the queue. |
| int64_t mdat_tail_; |
| |
| // The highest end offset in the current moof. This offset is |
| // relative to |moof_head_|. This value is used to make sure we have collected |
| // enough bytes to parse all samples and aux_info in the current moof. |
| int64_t highest_end_offset_; |
| |
| std::unique_ptr<mp4::Movie> moov_; |
| std::unique_ptr<mp4::TrackRunIterator> runs_; |
| |
| bool has_audio_; |
| bool has_video_; |
| std::set<uint32_t> audio_track_ids_; |
| std::set<uint32_t> video_track_ids_; |
| // The object types allowed for audio tracks. For FLAC indication, use |
| // |has_flac_|; |
| const std::set<int> audio_object_types_; |
| const bool has_sbr_; |
| const bool has_flac_; |
| const bool has_iamf_; |
| |
| // Tracks the number of MEDIA_LOGS for skipping empty trun samples. |
| int num_empty_samples_skipped_; |
| |
| // Tracks the number of MEDIA_LOGS for invalid bitstream conversion. |
| int num_invalid_conversions_; |
| |
| // Tracks the number of MEDIA_LOGS for video keyframe MP4<->frame mismatch. |
| int num_video_keyframe_mismatches_; |
| }; |
| |
| } // namespace media::mp4 |
| |
| #endif // MEDIA_FORMATS_MP4_MP4_STREAM_PARSER_H_ |