| // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "media/filters/audio_file_reader.h" |
| |
| #include <stddef.h> |
| |
| #include <cmath> |
| #include <memory> |
| #include <vector> |
| |
| #include "base/bind.h" |
| #include "base/callback.h" |
| #include "base/logging.h" |
| #include "base/numerics/safe_math.h" |
| #include "base/time/time.h" |
| #include "media/base/audio_bus.h" |
| #include "media/base/audio_sample_types.h" |
| #include "media/ffmpeg/ffmpeg_common.h" |
| #include "media/ffmpeg/ffmpeg_decoding_loop.h" |
| |
| namespace media { |
| |
| // AAC(M4A) decoding specific constants. |
| static const int kAACPrimingFrameCount = 2112; |
| static const int kAACRemainderFrameCount = 519; |
| |
| AudioFileReader::AudioFileReader(FFmpegURLProtocol* protocol) |
| : stream_index_(0), |
| protocol_(protocol), |
| audio_codec_(AudioCodec::kUnknown), |
| channels_(0), |
| sample_rate_(0), |
| av_sample_format_(0) {} |
| |
| AudioFileReader::~AudioFileReader() { |
| Close(); |
| } |
| |
| bool AudioFileReader::Open() { |
| return OpenDemuxer() && OpenDecoder(); |
| } |
| |
| bool AudioFileReader::OpenDemuxer() { |
| glue_ = std::make_unique<FFmpegGlue>(protocol_); |
| AVFormatContext* format_context = glue_->format_context(); |
| |
| // Open FFmpeg AVFormatContext. |
| if (!glue_->OpenContext()) { |
| DLOG(WARNING) << "AudioFileReader::Open() : error in avformat_open_input()"; |
| return false; |
| } |
| |
| const int result = avformat_find_stream_info(format_context, NULL); |
| if (result < 0) { |
| DLOG(WARNING) |
| << "AudioFileReader::Open() : error in avformat_find_stream_info()"; |
| return false; |
| } |
| |
| // Calling avformat_find_stream_info can uncover new streams. We wait till now |
| // to find the first audio stream, if any. |
| codec_context_.reset(); |
| bool found_stream = false; |
| for (size_t i = 0; i < format_context->nb_streams; ++i) { |
| if (format_context->streams[i]->codecpar->codec_type == |
| AVMEDIA_TYPE_AUDIO) { |
| stream_index_ = i; |
| found_stream = true; |
| break; |
| } |
| } |
| |
| if (!found_stream) |
| return false; |
| |
| // Get the codec context. |
| codec_context_ = |
| AVStreamToAVCodecContext(format_context->streams[stream_index_]); |
| if (!codec_context_) |
| return false; |
| |
| DCHECK_EQ(codec_context_->codec_type, AVMEDIA_TYPE_AUDIO); |
| return true; |
| } |
| |
| bool AudioFileReader::OpenDecoder() { |
| const AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id); |
| if (codec) { |
| // MP3 decodes to S16P which we don't support, tell it to use S16 instead. |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) |
| codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16; |
| |
| const int result = avcodec_open2(codec_context_.get(), codec, nullptr); |
| if (result < 0) { |
| DLOG(WARNING) << "AudioFileReader::Open() : could not open codec -" |
| << " result: " << result; |
| return false; |
| } |
| |
| // Ensure avcodec_open2() respected our format request. |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) { |
| DLOG(ERROR) << "AudioFileReader::Open() : unable to configure a" |
| << " supported sample format - " |
| << codec_context_->sample_fmt; |
| return false; |
| } |
| } else { |
| DLOG(WARNING) << "AudioFileReader::Open() : could not find codec."; |
| return false; |
| } |
| |
| // Verify the channel layout is supported by Chrome. Acts as a sanity check |
| // against invalid files. See http://crbug.com/171962 |
| if (ChannelLayoutToChromeChannelLayout(codec_context_->channel_layout, |
| codec_context_->channels) == |
| CHANNEL_LAYOUT_UNSUPPORTED) { |
| return false; |
| } |
| |
| // Store initial values to guard against midstream configuration changes. |
| channels_ = codec_context_->channels; |
| audio_codec_ = CodecIDToAudioCodec(codec_context_->codec_id); |
| sample_rate_ = codec_context_->sample_rate; |
| av_sample_format_ = codec_context_->sample_fmt; |
| return true; |
| } |
| |
| bool AudioFileReader::HasKnownDuration() const { |
| return glue_->format_context()->duration != AV_NOPTS_VALUE; |
| } |
| |
| void AudioFileReader::Close() { |
| codec_context_.reset(); |
| glue_.reset(); |
| } |
| |
| int AudioFileReader::Read( |
| std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets, |
| int packets_to_read) { |
| DCHECK(glue_ && codec_context_) |
| << "AudioFileReader::Read() : reader is not opened!"; |
| |
| FFmpegDecodingLoop decode_loop(codec_context_.get()); |
| |
| int total_frames = 0; |
| auto frame_ready_cb = |
| base::BindRepeating(&AudioFileReader::OnNewFrame, base::Unretained(this), |
| &total_frames, decoded_audio_packets); |
| |
| AVPacket packet; |
| int packets_read = 0; |
| while (packets_read++ < packets_to_read && ReadPacket(&packet)) { |
| const auto status = decode_loop.DecodePacket(&packet, frame_ready_cb); |
| av_packet_unref(&packet); |
| |
| if (status != FFmpegDecodingLoop::DecodeStatus::kOkay) |
| break; |
| } |
| |
| return total_frames; |
| } |
| |
| base::TimeDelta AudioFileReader::GetDuration() const { |
| const AVRational av_time_base = {1, AV_TIME_BASE}; |
| |
| DCHECK_NE(glue_->format_context()->duration, AV_NOPTS_VALUE); |
| base::CheckedNumeric<int64_t> estimated_duration_us = |
| glue_->format_context()->duration; |
| |
| if (audio_codec_ == AudioCodec::kAAC) { |
| // For certain AAC-encoded files, FFMPEG's estimated frame count might not |
| // be sufficient to capture the entire audio content that we want. This is |
| // especially noticeable for short files (< 10ms) resulting in silence |
| // throughout the decoded buffer. Thus we add the priming frames and the |
| // remainder frames to the estimation. |
| // (See: crbug.com/513178) |
| estimated_duration_us += ceil( |
| 1000000.0 * |
| static_cast<double>(kAACPrimingFrameCount + kAACRemainderFrameCount) / |
| sample_rate()); |
| } else { |
| // Add one microsecond to avoid rounding-down errors which can occur when |
| // |duration| has been calculated from an exact number of sample-frames. |
| // One microsecond is much less than the time of a single sample-frame |
| // at any real-world sample-rate. |
| estimated_duration_us += 1; |
| } |
| |
| return ConvertFromTimeBase(av_time_base, estimated_duration_us.ValueOrDie()); |
| } |
| |
| int AudioFileReader::GetNumberOfFrames() const { |
| return base::ClampCeil(GetDuration().InSecondsF() * sample_rate()); |
| } |
| |
| bool AudioFileReader::OpenDemuxerForTesting() { |
| return OpenDemuxer(); |
| } |
| |
| bool AudioFileReader::ReadPacketForTesting(AVPacket* output_packet) { |
| return ReadPacket(output_packet); |
| } |
| |
| bool AudioFileReader::ReadPacket(AVPacket* output_packet) { |
| while (av_read_frame(glue_->format_context(), output_packet) >= 0) { |
| // Skip packets from other streams. |
| if (output_packet->stream_index != stream_index_) { |
| av_packet_unref(output_packet); |
| continue; |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| bool AudioFileReader::OnNewFrame( |
| int* total_frames, |
| std::vector<std::unique_ptr<AudioBus>>* decoded_audio_packets, |
| AVFrame* frame) { |
| int frames_read = frame->nb_samples; |
| if (frames_read < 0) |
| return false; |
| |
| const int channels = frame->channels; |
| if (frame->sample_rate != sample_rate_ || channels != channels_ || |
| frame->format != av_sample_format_) { |
| DLOG(ERROR) << "Unsupported midstream configuration change!" |
| << " Sample Rate: " << frame->sample_rate << " vs " |
| << sample_rate_ << ", Channels: " << channels << " vs " |
| << channels_ << ", Sample Format: " << frame->format << " vs " |
| << av_sample_format_; |
| |
| // This is an unrecoverable error, so bail out. We'll return |
| // whatever we've decoded up to this point. |
| return false; |
| } |
| |
| // AAC decoding doesn't properly trim the last packet in a stream, so if we |
| // have duration information, use it to set the correct length to avoid extra |
| // silence from being output. In the case where we are also discarding some |
| // portion of the packet (as indicated by a negative pts), we further want to |
| // adjust the duration downward by however much exists before zero. |
| if (audio_codec_ == AudioCodec::kAAC && frame->pkt_duration) { |
| const base::TimeDelta pkt_duration = ConvertFromTimeBase( |
| glue_->format_context()->streams[stream_index_]->time_base, |
| frame->pkt_duration + std::min(static_cast<int64_t>(0), frame->pts)); |
| const base::TimeDelta frame_duration = |
| base::Seconds(frames_read / static_cast<double>(sample_rate_)); |
| |
| if (pkt_duration < frame_duration && pkt_duration > base::TimeDelta()) { |
| const int new_frames_read = |
| base::ClampFloor(frames_read * (pkt_duration / frame_duration)); |
| DVLOG(2) << "Shrinking AAC frame from " << frames_read << " to " |
| << new_frames_read << " based on packet duration."; |
| frames_read = new_frames_read; |
| |
| // The above process may delete the entire packet. |
| if (!frames_read) |
| return true; |
| } |
| } |
| |
| // Deinterleave each channel and convert to 32bit floating-point with |
| // nominal range -1.0 -> +1.0. If the output is already in float planar |
| // format, just copy it into the AudioBus. |
| decoded_audio_packets->emplace_back(AudioBus::Create(channels, frames_read)); |
| AudioBus* audio_bus = decoded_audio_packets->back().get(); |
| |
| if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLT) { |
| audio_bus->FromInterleaved<Float32SampleTypeTraits>( |
| reinterpret_cast<float*>(frame->data[0]), frames_read); |
| } else if (codec_context_->sample_fmt == AV_SAMPLE_FMT_FLTP) { |
| for (int ch = 0; ch < audio_bus->channels(); ++ch) { |
| memcpy(audio_bus->channel(ch), frame->extended_data[ch], |
| sizeof(float) * frames_read); |
| } |
| } else { |
| int bytes_per_sample = av_get_bytes_per_sample(codec_context_->sample_fmt); |
| switch (bytes_per_sample) { |
| case 1: |
| audio_bus->FromInterleaved<UnsignedInt8SampleTypeTraits>( |
| reinterpret_cast<const uint8_t*>(frame->data[0]), frames_read); |
| break; |
| case 2: |
| audio_bus->FromInterleaved<SignedInt16SampleTypeTraits>( |
| reinterpret_cast<const int16_t*>(frame->data[0]), frames_read); |
| break; |
| case 4: |
| audio_bus->FromInterleaved<SignedInt32SampleTypeTraits>( |
| reinterpret_cast<const int32_t*>(frame->data[0]), frames_read); |
| break; |
| default: |
| NOTREACHED() << "Unsupported bytes per sample encountered: " |
| << bytes_per_sample; |
| audio_bus->ZeroFrames(frames_read); |
| } |
| } |
| |
| (*total_frames) += frames_read; |
| return true; |
| } |
| |
| bool AudioFileReader::SeekForTesting(base::TimeDelta seek_time) { |
| // Use the AVStream's time_base, since |codec_context_| does not have |
| // time_base populated until after OpenDecoder(). |
| return av_seek_frame( |
| glue_->format_context(), stream_index_, |
| ConvertToTimeBase(GetAVStreamForTesting()->time_base, seek_time), |
| AVSEEK_FLAG_BACKWARD) >= 0; |
| } |
| |
| const AVStream* AudioFileReader::GetAVStreamForTesting() const { |
| return glue_->format_context()->streams[stream_index_]; |
| } |
| |
| } // namespace media |