| /* |
| * Copyright 2012 Google Inc. All Rights Reserved. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "media/filters/shell_flv_parser.h" |
| |
| #include <inttypes.h> |
| #include <limits> |
| |
| #include "base/stringprintf.h" |
| #include "media/base/endian_util.h" |
| |
| namespace media { |
| |
| // "FLV" as hex ASCII codes |
| static const uint32 kFLV = 0x00464c56; |
| |
| // FLV configuration, such as the AVCConfigRecord and the AudioSpecificConfig, |
| // should proceed any actual encoded data, and should be in the top of the file. |
| // This constant describes how far into the file we're willing to traverse |
| // without encountering metadata or encoded video keyframe data before giving |
| // up. |
| static const uint64 kMetadataMaxBytes = 4 * 1024 * 1024; |
| |
| static const uint8 kAudioTagType = 8; |
| static const uint8 kVideoTagType = 9; |
| static const uint8 kScriptDataObjectTagType = 18; |
| |
| // size of standard FLV tag |
| static const int kTagSize = 11; |
| // To limit reads we download a bit of extra data in flash headers to make |
| // sure we get all of the tag information we might need, allowing us |
| // to download the actual encoded data directly into a decoder buffer with |
| // proper alignment on a subsequent read. This value is calculated as the sum of |
| // the FLV tag size, the VIDEODATA tag size, and the AVCVIDEOPACKET tag size, |
| // which is the maximum amount of tag data that we will need to parse before |
| // download encoded A/V data. |
| static const int kTagDownloadSize = kTagSize + 1 + 4; |
| // these constants describe total tag sizes for AAC/AVC addendums to tags |
| static const int kAudioTagSize = 2; |
| static const int kVideoTagSize = 5; |
| |
| // FLV AUDIODATA tag constants |
| static const uint8 kSoundFormatTypeAAC = 10; |
| // FLV AACAUDIODATA tag constants |
| static const uint8 kAACPacketTypeSequence = 0; |
| static const uint8 kAACPacketTypeRaw = 1; |
| // FLV VIDEODATA tag constants |
| static const uint8 kCodecIDAVC = 7; |
| // FLV AVCVIDEODATA tag constants |
| static const uint8 kAVCPacketTypeSequenceHeader = 0; |
| static const uint8 kAVCPacketTypeNALU = 1; |
| // Unused: |
| // static const uint8 kAVCPacketTypeEndOfSequence = 2; |
| |
| // SCRIPTDATA parsing constants |
| static const uint8 kAMF0NumberType = 0x00; |
| static const int kAMF0NumberLength = 9; |
| |
| // static |
| PipelineStatus ShellFLVParser::Construct( |
| scoped_refptr<ShellDataSourceReader> reader, |
| const uint8* construction_header, |
| scoped_refptr<ShellParser>* parser) { |
| DCHECK(parser); |
| *parser = NULL; |
| |
| // look for "FLV" string at top of file, mask off LSB |
| uint32 FLV = endian_util::load_uint32_big_endian(construction_header) >> 8; |
| if (FLV != kFLV) { |
| // Not an flv. |
| return DEMUXER_ERROR_COULD_NOT_PARSE; |
| } |
| // Check for availability of both an audio and video stream. Audio stream is |
| // third bit, video stream is first bit in 5th byte of file header |
| if ((construction_header[4] & 0x05) != 0x05) { |
| return DEMUXER_ERROR_NO_SUPPORTED_STREAMS; |
| } |
| // offset of first data tag in stream is next 4 bytes |
| uint32 data_offset = |
| endian_util::load_uint32_big_endian(construction_header + 5); |
| // add four bytes to skip over PreviousTagSize0 |
| data_offset += 4; |
| |
| // construct an FLV parser |
| *parser = new ShellFLVParser(reader, data_offset); |
| return PIPELINE_OK; |
| } |
| |
| ShellFLVParser::ShellFLVParser(scoped_refptr<ShellDataSourceReader> reader, |
| uint32 tag_start_offset) |
| : ShellAVCParser(reader), |
| tag_offset_(tag_start_offset), |
| at_end_of_file_(false) {} |
| |
| ShellFLVParser::~ShellFLVParser() {} |
| |
| bool ShellFLVParser::ParseConfig() { |
| // traverse file until we either reach the limit of bytes we're willing to |
| // parse of config info or we've encountered actual keyframe video data. |
| while (tag_offset_ < kMetadataMaxBytes && time_to_byte_map_.size() == 0) { |
| if (!ParseNextTag()) { |
| return false; |
| } |
| } |
| |
| if (duration_.InMilliseconds() == 0) { |
| return false; |
| } |
| |
| // We may have a valid duration by now and the reader may know the |
| // length of the file in bytes, see if we can extrapolate a bitrate from |
| // this. |
| if (duration_ != kInfiniteDuration() && reader_->FileSize() > 0) { |
| bits_per_second_ = (uint32)( |
| ((reader_->FileSize() * 8000ULL) / (duration_.InMilliseconds()))); |
| } |
| |
| return true; |
| } |
| |
| scoped_refptr<ShellAU> ShellFLVParser::GetNextAU(DemuxerStream::Type type) { |
| if (type == DemuxerStream::AUDIO) { |
| return GetNextAudioAU(); |
| } else if (type == DemuxerStream::VIDEO) { |
| return GetNextVideoAU(); |
| } else { |
| NOTREACHED(); |
| } |
| return NULL; |
| } |
| |
| // seeking an flv: |
| // 1) finding nearest video keyframe before timestamp: |
| // a) If we are seeking in to an area we have already parsed then we |
| // will find the bounding keyframe. |
| // b) If not, we parse the FLV until a) is true. |
| // 2) set tag_offset_ to the byte offset of the keyframe found in 1) |
| bool ShellFLVParser::SeekTo(base::TimeDelta timestamp) { |
| // convert timestamp to millisecond FLV timestamp |
| uint32 timestamp_flv = (uint32)timestamp.InMilliseconds(); |
| bool found_upper_bound = false; |
| uint64 seek_byte_offset = tag_offset_; |
| uint32 seek_timestamp = 0; |
| // upper_bound returns iterator of first element in container with key > arg |
| TimeToByteMap::iterator keyframe_in_map = |
| time_to_byte_map_.upper_bound(timestamp_flv); |
| // this is case 1a), or keyframe is last keyframe before EOS, |
| // or map is empty (error state) |
| if (keyframe_in_map == time_to_byte_map_.end()) { |
| // is map empty? This is an error case, we should always have found a |
| // keyframe during ParseConfig() |
| if (time_to_byte_map_.size() == 0) { |
| NOTREACHED() << "empty time to byte map on FLV seek"; |
| return false; |
| } else { |
| // start at last keyframe in the map and parse from there |
| seek_byte_offset = time_to_byte_map_.rbegin()->second; |
| } |
| } else { |
| found_upper_bound = true; |
| // it's possible timestamp <= first keyframe in map, in which case we |
| // use the first keyframe in map. |
| if (keyframe_in_map != time_to_byte_map_.begin()) { |
| keyframe_in_map--; |
| } |
| seek_byte_offset = keyframe_in_map->second; |
| seek_timestamp = keyframe_in_map->first; |
| } |
| // if seek has changed our position in the file jump there now |
| if (seek_byte_offset != tag_offset_) { |
| JumpParserTo(seek_byte_offset); |
| } |
| // if found_upper_bound is still false we are in case 1b), parse ahead until |
| // we encounter an upper bound or an eof |
| while (!found_upper_bound && !at_end_of_file_) { |
| // save highest keyframe in file in case it becomes the one we want |
| seek_byte_offset = time_to_byte_map_.rbegin()->second; |
| seek_timestamp = time_to_byte_map_.rbegin()->first; |
| // parse next tag in the file |
| if (!ParseNextTag()) { |
| return false; |
| } |
| // check last keyframe timestamp, if it's greater than our target timestamp |
| // we can stop |
| found_upper_bound = (time_to_byte_map_.rbegin()->first > timestamp_flv); |
| } |
| // make sure we have done step 2), jump parser to new keyframe |
| if (seek_byte_offset != tag_offset_) { |
| JumpParserTo(seek_byte_offset); |
| } |
| DLOG(INFO) << base::StringPrintf("flv parser seeking to timestamp: %" PRId64 |
| " chose keyframe at %d", |
| timestamp.InMilliseconds(), seek_timestamp); |
| return true; |
| } |
| |
| scoped_refptr<ShellAU> ShellFLVParser::GetNextAudioAU() { |
| // As audio timestamps are supposed to increase monotonically we need |
| // only 2 to calculate a duration. |
| while (next_audio_aus_.size() < 2) { |
| if (!ParseNextTag()) { |
| return NULL; |
| } |
| } |
| // There should always be 2 AUs in the queue now, even if they are both EOS. |
| DCHECK_GE(next_audio_aus_.size(), 2); |
| |
| // Extract first AU in queue |
| scoped_refptr<ShellAU> au(next_audio_aus_.front()); |
| next_audio_aus_.pop_front(); |
| // Next timestamp should be greater than ours, if not something is very funny |
| // with this FLV and we won't be able to calculate duration. |
| if (next_audio_aus_.front()->GetTimestamp() >= au->GetTimestamp()) { |
| au->SetDuration(next_audio_aus_.front()->GetTimestamp() - |
| au->GetTimestamp()); |
| } else { |
| DLOG(ERROR) << "out of order audio timestamps encountered on FLV parsing."; |
| } |
| return au; |
| } |
| |
| scoped_refptr<ShellAU> ShellFLVParser::GetNextVideoAU() { |
| while (next_video_aus_.empty()) { |
| if (!ParseNextTag()) { |
| return NULL; |
| } |
| } |
| // extract next video AU |
| scoped_refptr<ShellAU> au(next_video_aus_.front()); |
| next_video_aus_.pop_front(); |
| |
| return au; |
| } |
| |
| // |
| // byte layout of an FLVTAG is: |
| // field | type | comment |
| // ------------------+--------+--------- |
| // previous tag size | uint32 | we skip past these when parsing last tag |
| // tag type | uint8 | parsing starts here. describes tag type |
| // tag data size | uint24 | size of tag data payload (everything after this) |
| // timestamp | uint24 | lower 24 bits of timestamp in milliseconds |
| // timestamp ext | uint8 | upper 8 bits of timestamp in milliseconds |
| // stream id | uint24 | always 0 |
| // |
| bool ShellFLVParser::ParseNextTag() { |
| uint8 tag_buffer[kTagDownloadSize]; |
| |
| if (at_end_of_file_) { |
| return false; |
| } |
| |
| // get previous tag size and header for next one |
| int bytes_read = |
| reader_->BlockingRead(tag_offset_, kTagDownloadSize, tag_buffer); |
| |
| // if that was the last tag in the stream detect the EOS and return. This |
| // is where normal termination of an FLV stream will occur. |
| if (bytes_read < kTagDownloadSize) { |
| at_end_of_file_ = true; |
| // Normal termination of an FLV. Enqueue EOS AUs in both streams. |
| next_video_aus_.push_back(ShellAU::CreateEndOfStreamAU( |
| DemuxerStream::VIDEO, video_track_duration_)); |
| next_audio_aus_.push_back(ShellAU::CreateEndOfStreamAU( |
| DemuxerStream::AUDIO, audio_track_duration_)); |
| return true; |
| } |
| |
| // extract the tag data size from the tag header as uint24 |
| // this is size of attached data field only not including this header |
| // but including the audio and video sub-headers |
| uint32 tag_data_size = |
| endian_util::load_uint32_big_endian(tag_buffer + 1) >> 8; |
| |
| // extract timestamp, wonky byte order comes from the standard |
| int32 timestamp = tag_buffer[4] << 16 | tag_buffer[5] << 8 | tag_buffer[6] | |
| tag_buffer[7] << 24; |
| |
| // choose which tag type to parse |
| bool parse_result = true; |
| uint8* tag_body = tag_buffer + kTagSize; |
| switch (tag_buffer[0]) { |
| case kAudioTagType: |
| parse_result = ParseAudioDataTag(tag_body, tag_data_size, timestamp); |
| break; |
| |
| case kVideoTagType: |
| parse_result = ParseVideoDataTag(tag_body, tag_data_size, timestamp); |
| break; |
| |
| case kScriptDataObjectTagType: |
| parse_result = |
| ParseScriptDataObjectTag(tag_body, tag_data_size, timestamp); |
| break; |
| |
| default: |
| DLOG(WARNING) << base::StringPrintf("unsupported FLV TagType %d", |
| tag_buffer[0]); |
| break; |
| } |
| |
| // advance read pointer to next tag header |
| tag_offset_ += kTagSize + tag_data_size + 4; |
| return parse_result; |
| } |
| |
| // FLV AUDIODATA tags are packed into a single byte, bit layout is: |
| // aaaabbcd |
| // aaaa: 4 bits format enum, AAC is 10 decimal |
| // bb: 2 bits sample rate enum, AAC is always 3 decimal (44 KHz) |
| // c: 1 bit sound size, 0 means 8 bit, 1 means 16 bit, AAC is always 1 |
| // d: 1 bit sound type, 0 means mono, 1 means stereo, AAC is always 1 |
| // if this is an AACAUDIODATA tag the next byte in the sequence 0 if |
| // this is an AudioSpecificConfig tag or 1 if it is raw AAC frame data. |
| // |
| // * NOTE that FLV standard defines fixed values for sample rate, bit |
| // width, and channel count for AAC samples but the AudioSpecificConfig may |
| // define those values differently and is authoritative, so we ignore the |
| // FLV-provided config values. |
| bool ShellFLVParser::ParseAudioDataTag(uint8* tag, |
| uint32 size, |
| uint32 timestamp) { |
| // Smallest meaningful size for an audio data tag is 4 bytes, one for the |
| // AUDIODATA tag, one for the AACAUDIODATA tag. and minimum 2 bytes of data. |
| if (size < kAudioTagSize + 2) { |
| return false; |
| } |
| // we only support parsing AAC audio data tags |
| if (((tag[0] >> 4) & 0x0f) != kSoundFormatTypeAAC) { |
| return false; |
| } |
| // now see if this is a config packet or a data packet |
| if (tag[1] == kAACPacketTypeSequence) { // audio config info |
| // AudioSpecificConfig records can be longer than two bytes but we extract |
| // everything we need from the first two bytes, positioned here at index 2 |
| // and 3 in the tag buffer |
| ParseAudioSpecificConfig(tag[2], tag[3]); |
| } else if (tag[1] == kAACPacketTypeRaw) { // raw AAC audio |
| // this is audio data, check timestamp |
| base::TimeDelta ts = base::TimeDelta::FromMilliseconds(timestamp); |
| if (ts > audio_track_duration_) { |
| audio_track_duration_ = ts; |
| } |
| // build the AU |
| size_t prepend_size = CalculatePrependSize(DemuxerStream::AUDIO, true); |
| scoped_refptr<ShellAU> au = ShellAU::CreateAudioAU( |
| tag_offset_ + kTagSize + kAudioTagSize, size - kAudioTagSize, |
| prepend_size, true, ts, kInfiniteDuration(), this); |
| next_audio_aus_.push_back(au); |
| } |
| |
| return true; |
| } |
| |
| // FLV VIDEODATA tags are packed into a single byte, bit layout is: |
| // aaaabbbb |
| // aaaa: 4 bits frame type enum, 1 is AVC keyframe, 2 is AVC inter-frame |
| // bbbb: 4 bits codecID, 7 is AVC |
| // if this is an AVCVIDEOPACKET tag the next 4 bytes comprise the AVCVIDEOPACKET |
| // tag header: |
| // field | type | comment |
| // ------------------+--------+--------- |
| // AVCPacketType | uint8 | 0 is config, 1 is data, 2 is EOS (ignored) |
| // CompositionTime | int24 | signed time offset, add to FLV timestamp for pts |
| // |
| // NOTE that FLV video data is always presented in decode order and |
| // CompositionTime is not entirely reliable for determining pts, as some |
| // encoders always set it to zero. |
| bool ShellFLVParser::ParseVideoDataTag(uint8* tag, |
| uint32 size, |
| uint32 timestamp) { |
| // need at least 5 bytes of tag data, one for the VIDEODATA tag and 4 for |
| // the AVCVIDEODATA tag that should always follow it |
| if (size < kVideoTagSize) { |
| return false; |
| } |
| // check for AVC format |
| if ((tag[0] & 0x0f) != kCodecIDAVC) { |
| return false; |
| } |
| // determine packet type |
| if (tag[1] == kAVCPacketTypeSequenceHeader) { // video config info |
| // AVC config record, download and parse |
| return DownloadAndParseAVCConfigRecord( |
| tag_offset_ + kTagSize + kVideoTagSize, size); |
| } else if (tag[1] == kAVCPacketTypeNALU) { // raw AVC data |
| // should we add this to our keyframe map? |
| bool is_keyframe = (tag[0] & 0xf0) == 0x10; |
| // TODO: when we add support for seeking, make sure these numbers are |
| // consistent with the numbers provided by the time-to-byte manifest. |
| if (is_keyframe) { |
| time_to_byte_map_[timestamp] = tag_offset_; |
| } |
| // extract 24-bit composition time offset in big-endian for this frame |
| int32 composition_time_offset = tag[2] * 65536 + tag[3] * 256 + tag[4]; |
| // calculate pts from flv timestamp and cts |
| uint32 pts = timestamp + composition_time_offset; |
| // FLV standard says that there can be multiple AVC NALUs packed here, so |
| // we iterate through the tag data payload and enqueue byte offsets for |
| // each NALU we encounter. The NALUs are packed by size counter that is |
| // nal_header_size_ bytes long followed by the NALU of that size. |
| uint32 avc_data_size = size - kVideoTagSize; |
| uint32 avc_tag_offset = 0; |
| base::TimeDelta ts = base::TimeDelta::FromMilliseconds(pts); |
| if (ts > video_track_duration_) { |
| video_track_duration_ = ts; |
| } |
| |
| size_t prepend_size = |
| CalculatePrependSize(DemuxerStream::VIDEO, is_keyframe); |
| scoped_refptr<ShellAU> au = ShellAU::CreateVideoAU( |
| tag_offset_ + kTagSize + kVideoTagSize + avc_tag_offset, avc_data_size, |
| prepend_size, nal_header_size_, is_keyframe, ts, kInfiniteDuration(), |
| this); |
| // enqueue data tag |
| next_video_aus_.push_back(au); |
| } |
| return true; |
| } |
| |
| // FLV SCRIPTDATA tags are in serialized typically in Action Message Format 0 as |
| // a collection of key/value pairs terminated by a special code. We only wish to |
| // parse the duration and byterate from the scriptdata so we use a very light |
| // weight parser in ExtractAMF0Number(); |
| bool ShellFLVParser::ParseScriptDataObjectTag(uint8* tag, |
| uint32 size, |
| uint32 timestamp) { |
| scoped_refptr<ShellScopedArray> script_buffer = |
| ShellBufferFactory::Instance()->AllocateArray(size); |
| if (!script_buffer || !script_buffer->Get()) { |
| return false; |
| } |
| int bytes_read = |
| reader_->BlockingRead(tag_offset_ + kTagSize, size, script_buffer->Get()); |
| DCHECK_LE(size, static_cast<uint32>(std::numeric_limits<int32>::max())); |
| if (bytes_read < static_cast<int>(size)) { |
| return false; |
| } |
| // Attempt to extract the duration from the FLV metadata. |
| double duration_seconds = 0; |
| if (!ExtractAMF0Number(script_buffer, "duration", &duration_seconds)) { |
| // might be worth trying to parse this as AMF3? |
| return false; |
| } |
| duration_ = base::TimeDelta::FromMicroseconds( |
| duration_seconds * base::Time::kMicrosecondsPerSecond); |
| |
| // Try for the byterate too, but this is nonfatal if we can't get it. |
| double byterate = 0; |
| if (ExtractAMF0Number(script_buffer, "totaldatarate", &byterate)) { |
| bits_per_second_ = (uint32)(byterate * 8.0); |
| } |
| |
| return true; |
| } |
| |
| // The SCRIPTDATA tag contains a list of ordered pairs of AMF0 strings followed |
| // by an arbitrary AMF0 object. Typically there's one object of interest with |
| // string name 'onMetaData' followed by an anonymous object. In any event we |
| // will scan the buffer looking only for the provided string, verify the next |
| // bytes in the stream describe an AMF0 number, extract and return it. |
| // TODO: Replace this (brittle) code with a proper AMF0 parser. |
| bool ShellFLVParser::ExtractAMF0Number(scoped_refptr<ShellScopedArray> amf0, |
| const char* name, |
| double* number_out) { |
| DCHECK(number_out); |
| // the string will be proceeded by a u16 big-endian string length |
| uint16 name_length = strlen(name); |
| // there's lots of nonprinting characters and zeros in amf0, so we'll need |
| // to search for the string using our own method |
| int match_offset = 0; |
| int name_offset = 0; |
| // the last index in the buffer we could extract a string followed by Number |
| int search_length = amf0->Size() - (name_length + kAMF0NumberLength); |
| uint8* search_buffer = amf0->Get(); |
| while (match_offset <= search_length && name_offset < name_length) { |
| if (search_buffer[match_offset] == name[name_offset]) { |
| name_offset++; // advance our substring pointer in the event of a match |
| } else { |
| name_offset = 0; // reset our substring pointer on a miss |
| } |
| match_offset++; // always advance our larger string pointer |
| } |
| // If we got a match name_offset will be pointing past the end of the search |
| // string and match_offset will be pointing to valid memory with room to |
| // extract a Number |
| if ((name_offset == name_length) && |
| (match_offset <= amf0->Size() - kAMF0NumberLength)) { |
| // make sure the first byte matches the number type code |
| if (search_buffer[match_offset] != kAMF0NumberType) { |
| return false; |
| } |
| // advance pointer past the number type to the number itself |
| match_offset++; |
| // load big-endian double as uint, then cast to correct type |
| uint64 num_as_uint = |
| endian_util::load_uint64_big_endian(search_buffer + match_offset); |
| *number_out = *((double*)(&num_as_uint)); |
| return true; |
| } |
| return false; |
| } |
| |
| void ShellFLVParser::JumpParserTo(uint64 byte_offset) { |
| next_video_aus_.clear(); |
| next_audio_aus_.clear(); |
| at_end_of_file_ = false; |
| tag_offset_ = byte_offset; |
| } |
| |
| } // namespace media |