// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/ffmpeg/ffmpeg_common.h"
#include "base/basictypes.h"
#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_number_conversions.h"
#include "media/base/decoder_buffer.h"
#include "media/base/video_frame.h"
#include "media/base/video_util.h"
namespace media {
// Why FF_INPUT_BUFFER_PADDING_SIZE? FFmpeg assumes all input buffers are
// padded. Check here to ensure FFmpeg only receives data padded to its
// specifications.
COMPILE_ASSERT(DecoderBuffer::kPaddingSize >= FF_INPUT_BUFFER_PADDING_SIZE,
decoder_buffer_padding_size_does_not_fit_ffmpeg_requirement);
// Alignment requirement by FFmpeg for input and output buffers. This need to
// be updated to match FFmpeg when it changes.
#if defined(ARCH_CPU_ARM_FAMILY)
static const int kFFmpegBufferAddressAlignment = 16;
#else
static const int kFFmpegBufferAddressAlignment = 32;
#endif
// Check here to ensure FFmpeg only receives data aligned to its specifications.
COMPILE_ASSERT(
DecoderBuffer::kAlignmentSize >= kFFmpegBufferAddressAlignment &&
DecoderBuffer::kAlignmentSize % kFFmpegBufferAddressAlignment == 0,
decoder_buffer_alignment_size_does_not_fit_ffmpeg_requirement);
// Allows faster SIMD YUV convert. Also, FFmpeg overreads/-writes occasionally.
// See video_get_buffer() in libavcodec/utils.c.
static const int kFFmpegOutputBufferPaddingSize = 16;
COMPILE_ASSERT(VideoFrame::kFrameSizePadding >= kFFmpegOutputBufferPaddingSize,
video_frame_padding_size_does_not_fit_ffmpeg_requirement);
COMPILE_ASSERT(
VideoFrame::kFrameAddressAlignment >= kFFmpegBufferAddressAlignment &&
VideoFrame::kFrameAddressAlignment % kFFmpegBufferAddressAlignment == 0,
video_frame_address_alignment_does_not_fit_ffmpeg_requirement);
static const AVRational kMicrosBase = { 1, base::Time::kMicrosecondsPerSecond };
base::TimeDelta ConvertFromTimeBase(const AVRational& time_base,
int64 timestamp) {
int64 microseconds = av_rescale_q(timestamp, time_base, kMicrosBase);
return base::TimeDelta::FromMicroseconds(microseconds);
}
int64 ConvertToTimeBase(const AVRational& time_base,
const base::TimeDelta& timestamp) {
return av_rescale_q(timestamp.InMicroseconds(), kMicrosBase, time_base);
}
// Converts an FFmpeg audio codec ID into its corresponding supported codec id.
static AudioCodec CodecIDToAudioCodec(AVCodecID codec_id) {
switch (codec_id) {
case AV_CODEC_ID_AAC:
return kCodecAAC;
case AV_CODEC_ID_MP3:
return kCodecMP3;
case AV_CODEC_ID_VORBIS:
return kCodecVorbis;
case AV_CODEC_ID_PCM_U8:
case AV_CODEC_ID_PCM_S16LE:
case AV_CODEC_ID_PCM_S24LE:
case AV_CODEC_ID_PCM_F32LE:
return kCodecPCM;
case AV_CODEC_ID_PCM_S16BE:
return kCodecPCM_S16BE;
case AV_CODEC_ID_PCM_S24BE:
return kCodecPCM_S24BE;
case AV_CODEC_ID_FLAC:
return kCodecFLAC;
case AV_CODEC_ID_AMR_NB:
return kCodecAMR_NB;
case AV_CODEC_ID_AMR_WB:
return kCodecAMR_WB;
case AV_CODEC_ID_GSM_MS:
return kCodecGSM_MS;
case AV_CODEC_ID_PCM_ALAW:
return kCodecPCM_ALAW;
case AV_CODEC_ID_PCM_MULAW:
return kCodecPCM_MULAW;
case AV_CODEC_ID_OPUS:
return kCodecOpus;
default:
DVLOG(1) << "Unknown audio CodecID: " << codec_id;
}
return kUnknownAudioCodec;
}
static AVCodecID AudioCodecToCodecID(AudioCodec audio_codec,
SampleFormat sample_format) {
switch (audio_codec) {
case kCodecAAC:
return AV_CODEC_ID_AAC;
case kCodecMP3:
return AV_CODEC_ID_MP3;
case kCodecPCM:
switch (sample_format) {
case kSampleFormatU8:
return AV_CODEC_ID_PCM_U8;
case kSampleFormatS16:
return AV_CODEC_ID_PCM_S16LE;
case kSampleFormatS32:
return AV_CODEC_ID_PCM_S24LE;
case kSampleFormatF32:
return AV_CODEC_ID_PCM_F32LE;
default:
DVLOG(1) << "Unsupported sample format: " << sample_format;
}
break;
case kCodecPCM_S16BE:
return AV_CODEC_ID_PCM_S16BE;
case kCodecPCM_S24BE:
return AV_CODEC_ID_PCM_S24BE;
case kCodecVorbis:
return AV_CODEC_ID_VORBIS;
case kCodecFLAC:
return AV_CODEC_ID_FLAC;
case kCodecAMR_NB:
return AV_CODEC_ID_AMR_NB;
case kCodecAMR_WB:
return AV_CODEC_ID_AMR_WB;
case kCodecGSM_MS:
return AV_CODEC_ID_GSM_MS;
case kCodecPCM_ALAW:
return AV_CODEC_ID_PCM_ALAW;
case kCodecPCM_MULAW:
return AV_CODEC_ID_PCM_MULAW;
case kCodecOpus:
return AV_CODEC_ID_OPUS;
default:
DVLOG(1) << "Unknown AudioCodec: " << audio_codec;
}
return AV_CODEC_ID_NONE;
}
// Converts an FFmpeg video codec ID into its corresponding supported codec id.
static VideoCodec CodecIDToVideoCodec(AVCodecID codec_id) {
switch (codec_id) {
case AV_CODEC_ID_H264:
return kCodecH264;
case AV_CODEC_ID_THEORA:
return kCodecTheora;
case AV_CODEC_ID_MPEG4:
return kCodecMPEG4;
case AV_CODEC_ID_VP8:
return kCodecVP8;
case AV_CODEC_ID_VP9:
return kCodecVP9;
default:
DVLOG(1) << "Unknown video CodecID: " << codec_id;
}
return kUnknownVideoCodec;
}
static AVCodecID VideoCodecToCodecID(VideoCodec video_codec) {
switch (video_codec) {
case kCodecH264:
return AV_CODEC_ID_H264;
case kCodecTheora:
return AV_CODEC_ID_THEORA;
case kCodecMPEG4:
return AV_CODEC_ID_MPEG4;
case kCodecVP8:
return AV_CODEC_ID_VP8;
case kCodecVP9:
return AV_CODEC_ID_VP9;
default:
DVLOG(1) << "Unknown VideoCodec: " << video_codec;
}
return AV_CODEC_ID_NONE;
}
static VideoCodecProfile ProfileIDToVideoCodecProfile(int profile) {
// Clear out the CONSTRAINED & INTRA flags which are strict subsets of the
// corresponding profiles with which they're used.
profile &= ~FF_PROFILE_H264_CONSTRAINED;
profile &= ~FF_PROFILE_H264_INTRA;
switch (profile) {
case FF_PROFILE_H264_BASELINE:
return H264PROFILE_BASELINE;
case FF_PROFILE_H264_MAIN:
return H264PROFILE_MAIN;
case FF_PROFILE_H264_EXTENDED:
return H264PROFILE_EXTENDED;
case FF_PROFILE_H264_HIGH:
return H264PROFILE_HIGH;
case FF_PROFILE_H264_HIGH_10:
return H264PROFILE_HIGH10PROFILE;
case FF_PROFILE_H264_HIGH_422:
return H264PROFILE_HIGH422PROFILE;
case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
return H264PROFILE_HIGH444PREDICTIVEPROFILE;
default:
DVLOG(1) << "Unknown profile id: " << profile;
}
return VIDEO_CODEC_PROFILE_UNKNOWN;
}
static int VideoCodecProfileToProfileID(VideoCodecProfile profile) {
switch (profile) {
case H264PROFILE_BASELINE:
return FF_PROFILE_H264_BASELINE;
case H264PROFILE_MAIN:
return FF_PROFILE_H264_MAIN;
case H264PROFILE_EXTENDED:
return FF_PROFILE_H264_EXTENDED;
case H264PROFILE_HIGH:
return FF_PROFILE_H264_HIGH;
case H264PROFILE_HIGH10PROFILE:
return FF_PROFILE_H264_HIGH_10;
case H264PROFILE_HIGH422PROFILE:
return FF_PROFILE_H264_HIGH_422;
case H264PROFILE_HIGH444PREDICTIVEPROFILE:
return FF_PROFILE_H264_HIGH_444_PREDICTIVE;
default:
DVLOG(1) << "Unknown VideoCodecProfile: " << profile;
}
return FF_PROFILE_UNKNOWN;
}
SampleFormat AVSampleFormatToSampleFormat(AVSampleFormat sample_format) {
switch (sample_format) {
case AV_SAMPLE_FMT_U8:
return kSampleFormatU8;
case AV_SAMPLE_FMT_S16:
return kSampleFormatS16;
case AV_SAMPLE_FMT_S32:
return kSampleFormatS32;
case AV_SAMPLE_FMT_FLT:
return kSampleFormatF32;
case AV_SAMPLE_FMT_S16P:
return kSampleFormatPlanarS16;
case AV_SAMPLE_FMT_FLTP:
return kSampleFormatPlanarF32;
default:
DVLOG(1) << "Unknown AVSampleFormat: " << sample_format;
}
return kUnknownSampleFormat;
}
static AVSampleFormat SampleFormatToAVSampleFormat(SampleFormat sample_format) {
switch (sample_format) {
case kSampleFormatU8:
return AV_SAMPLE_FMT_U8;
case kSampleFormatS16:
return AV_SAMPLE_FMT_S16;
case kSampleFormatS32:
return AV_SAMPLE_FMT_S32;
case kSampleFormatF32:
return AV_SAMPLE_FMT_FLT;
case kSampleFormatPlanarS16:
return AV_SAMPLE_FMT_S16P;
case kSampleFormatPlanarF32:
return AV_SAMPLE_FMT_FLTP;
default:
DVLOG(1) << "Unknown SampleFormat: " << sample_format;
}
return AV_SAMPLE_FMT_NONE;
}
static void AVCodecContextToAudioDecoderConfig(
const AVCodecContext* codec_context,
bool is_encrypted,
AudioDecoderConfig* config,
bool record_stats) {
DCHECK_EQ(codec_context->codec_type, AVMEDIA_TYPE_AUDIO);
AudioCodec codec = CodecIDToAudioCodec(codec_context->codec_id);
SampleFormat sample_format =
AVSampleFormatToSampleFormat(codec_context->sample_fmt);
ChannelLayout channel_layout = ChannelLayoutToChromeChannelLayout(
codec_context->channel_layout, codec_context->channels);
if (codec == kCodecOpus) {
// |codec_context->sample_fmt| is not set by FFmpeg because Opus decoding is
// not enabled in FFmpeg. It doesn't matter what value is set here, so long
// as it's valid, the true sample format is selected inside the decoder.
sample_format = kSampleFormatF32;
}
base::TimeDelta seek_preroll;
if (codec_context->seek_preroll > 0) {
seek_preroll = base::TimeDelta::FromMicroseconds(
codec_context->seek_preroll * 1000000.0 / codec_context->sample_rate);
}
base::TimeDelta codec_delay;
if (codec_context->delay > 0) {
codec_delay = base::TimeDelta::FromMicroseconds(
codec_context->delay * 1000000.0 / codec_context->sample_rate);
}
config->Initialize(codec,
sample_format,
channel_layout,
codec_context->sample_rate,
codec_context->extradata,
codec_context->extradata_size,
is_encrypted,
record_stats,
seek_preroll,
codec_delay);
if (codec != kCodecOpus) {
DCHECK_EQ(av_get_bytes_per_sample(codec_context->sample_fmt) * 8,
config->bits_per_channel());
}
}
void AVStreamToAudioDecoderConfig(
const AVStream* stream,
AudioDecoderConfig* config,
bool record_stats) {
bool is_encrypted = false;
AVDictionaryEntry* key = av_dict_get(stream->metadata, "enc_key_id", NULL, 0);
if (key)
is_encrypted = true;
return AVCodecContextToAudioDecoderConfig(
stream->codec, is_encrypted, config, record_stats);
}
void AudioDecoderConfigToAVCodecContext(const AudioDecoderConfig& config,
AVCodecContext* codec_context) {
codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
codec_context->codec_id = AudioCodecToCodecID(config.codec(),
config.sample_format());
codec_context->sample_fmt = SampleFormatToAVSampleFormat(
config.sample_format());
// TODO(scherkus): should we set |channel_layout|? I'm not sure if FFmpeg uses
// said information to decode.
codec_context->channels =
ChannelLayoutToChannelCount(config.channel_layout());
codec_context->sample_rate = config.samples_per_second();
if (config.extra_data()) {
codec_context->extradata_size = config.extra_data_size();
codec_context->extradata = reinterpret_cast<uint8_t*>(
av_malloc(config.extra_data_size() + FF_INPUT_BUFFER_PADDING_SIZE));
memcpy(codec_context->extradata, config.extra_data(),
config.extra_data_size());
memset(codec_context->extradata + config.extra_data_size(), '\0',
FF_INPUT_BUFFER_PADDING_SIZE);
} else {
codec_context->extradata = NULL;
codec_context->extradata_size = 0;
}
}
void AVStreamToVideoDecoderConfig(
const AVStream* stream,
VideoDecoderConfig* config,
bool record_stats) {
gfx::Size coded_size(stream->codec->coded_width, stream->codec->coded_height);
// TODO(vrk): This assumes decoded frame data starts at (0, 0), which is true
// for now, but may not always be true forever. Fix this in the future.
gfx::Rect visible_rect(stream->codec->width, stream->codec->height);
AVRational aspect_ratio = { 1, 1 };
if (stream->sample_aspect_ratio.num)
aspect_ratio = stream->sample_aspect_ratio;
else if (stream->codec->sample_aspect_ratio.num)
aspect_ratio = stream->codec->sample_aspect_ratio;
VideoCodec codec = CodecIDToVideoCodec(stream->codec->codec_id);
VideoCodecProfile profile = VIDEO_CODEC_PROFILE_UNKNOWN;
if (codec == kCodecVP8)
profile = VP8PROFILE_MAIN;
else if (codec == kCodecVP9)
profile = VP9PROFILE_MAIN;
else
profile = ProfileIDToVideoCodecProfile(stream->codec->profile);
gfx::Size natural_size = GetNaturalSize(
visible_rect.size(), aspect_ratio.num, aspect_ratio.den);
if (record_stats) {
UMA_HISTOGRAM_ENUMERATION("Media.VideoColorRange",
stream->codec->color_range,
AVCOL_RANGE_NB);
}
VideoFrame::Format format = PixelFormatToVideoFormat(stream->codec->pix_fmt);
if (codec == kCodecVP9) {
// TODO(tomfinegan): libavcodec doesn't know about VP9.
format = VideoFrame::YV12;
coded_size = natural_size;
}
bool is_encrypted = false;
AVDictionaryEntry* key = av_dict_get(stream->metadata, "enc_key_id", NULL, 0);
if (key)
is_encrypted = true;
AVDictionaryEntry* webm_alpha =
av_dict_get(stream->metadata, "alpha_mode", NULL, 0);
if (webm_alpha && !strcmp(webm_alpha->value, "1")) {
format = VideoFrame::YV12A;
}
config->Initialize(codec,
profile,
format,
coded_size, visible_rect, natural_size,
stream->codec->extradata, stream->codec->extradata_size,
is_encrypted,
record_stats);
}
void VideoDecoderConfigToAVCodecContext(
const VideoDecoderConfig& config,
AVCodecContext* codec_context) {
codec_context->codec_type = AVMEDIA_TYPE_VIDEO;
codec_context->codec_id = VideoCodecToCodecID(config.codec());
codec_context->profile = VideoCodecProfileToProfileID(config.profile());
codec_context->coded_width = config.coded_size().width();
codec_context->coded_height = config.coded_size().height();
codec_context->pix_fmt = VideoFormatToPixelFormat(config.format());
if (config.extra_data()) {
codec_context->extradata_size = config.extra_data_size();
codec_context->extradata = reinterpret_cast<uint8_t*>(
av_malloc(config.extra_data_size() + FF_INPUT_BUFFER_PADDING_SIZE));
memcpy(codec_context->extradata, config.extra_data(),
config.extra_data_size());
memset(codec_context->extradata + config.extra_data_size(), '\0',
FF_INPUT_BUFFER_PADDING_SIZE);
} else {
codec_context->extradata = NULL;
codec_context->extradata_size = 0;
}
}
ChannelLayout ChannelLayoutToChromeChannelLayout(int64_t layout, int channels) {
switch (layout) {
case AV_CH_LAYOUT_MONO:
return CHANNEL_LAYOUT_MONO;
case AV_CH_LAYOUT_STEREO:
return CHANNEL_LAYOUT_STEREO;
case AV_CH_LAYOUT_2_1:
return CHANNEL_LAYOUT_2_1;
case AV_CH_LAYOUT_SURROUND:
return CHANNEL_LAYOUT_SURROUND;
case AV_CH_LAYOUT_4POINT0:
return CHANNEL_LAYOUT_4_0;
case AV_CH_LAYOUT_2_2:
return CHANNEL_LAYOUT_2_2;
case AV_CH_LAYOUT_QUAD:
return CHANNEL_LAYOUT_QUAD;
case AV_CH_LAYOUT_5POINT0:
return CHANNEL_LAYOUT_5_0;
case AV_CH_LAYOUT_5POINT1:
return CHANNEL_LAYOUT_5_1;
case AV_CH_LAYOUT_5POINT0_BACK:
return CHANNEL_LAYOUT_5_0_BACK;
case AV_CH_LAYOUT_5POINT1_BACK:
return CHANNEL_LAYOUT_5_1_BACK;
case AV_CH_LAYOUT_7POINT0:
return CHANNEL_LAYOUT_7_0;
case AV_CH_LAYOUT_7POINT1:
return CHANNEL_LAYOUT_7_1;
case AV_CH_LAYOUT_7POINT1_WIDE:
return CHANNEL_LAYOUT_7_1_WIDE;
case AV_CH_LAYOUT_STEREO_DOWNMIX:
return CHANNEL_LAYOUT_STEREO_DOWNMIX;
case AV_CH_LAYOUT_2POINT1:
return CHANNEL_LAYOUT_2POINT1;
case AV_CH_LAYOUT_3POINT1:
return CHANNEL_LAYOUT_3_1;
case AV_CH_LAYOUT_4POINT1:
return CHANNEL_LAYOUT_4_1;
case AV_CH_LAYOUT_6POINT0:
return CHANNEL_LAYOUT_6_0;
case AV_CH_LAYOUT_6POINT0_FRONT:
return CHANNEL_LAYOUT_6_0_FRONT;
case AV_CH_LAYOUT_HEXAGONAL:
return CHANNEL_LAYOUT_HEXAGONAL;
case AV_CH_LAYOUT_6POINT1:
return CHANNEL_LAYOUT_6_1;
case AV_CH_LAYOUT_6POINT1_BACK:
return CHANNEL_LAYOUT_6_1_BACK;
case AV_CH_LAYOUT_6POINT1_FRONT:
return CHANNEL_LAYOUT_6_1_FRONT;
case AV_CH_LAYOUT_7POINT0_FRONT:
return CHANNEL_LAYOUT_7_0_FRONT;
#ifdef AV_CH_LAYOUT_7POINT1_WIDE_BACK
case AV_CH_LAYOUT_7POINT1_WIDE_BACK:
return CHANNEL_LAYOUT_7_1_WIDE_BACK;
#endif
case AV_CH_LAYOUT_OCTAGONAL:
return CHANNEL_LAYOUT_OCTAGONAL;
default:
// FFmpeg channel_layout is 0 for .wav and .mp3. Attempt to guess layout
// based on the channel count.
return GuessChannelLayout(channels);
}
}
VideoFrame::Format PixelFormatToVideoFormat(PixelFormat pixel_format) {
switch (pixel_format) {
case PIX_FMT_YUV422P:
return VideoFrame::YV16;
case PIX_FMT_YUV420P:
return VideoFrame::YV12;
case PIX_FMT_YUVJ420P:
return VideoFrame::YV12J;
case PIX_FMT_YUVA420P:
return VideoFrame::YV12A;
default:
DVLOG(1) << "Unsupported PixelFormat: " << pixel_format;
}
return VideoFrame::UNKNOWN;
}
PixelFormat VideoFormatToPixelFormat(VideoFrame::Format video_format) {
switch (video_format) {
case VideoFrame::YV16:
return PIX_FMT_YUV422P;
case VideoFrame::YV12:
return PIX_FMT_YUV420P;
case VideoFrame::YV12J:
return PIX_FMT_YUVJ420P;
case VideoFrame::YV12A:
return PIX_FMT_YUVA420P;
default:
DVLOG(1) << "Unsupported VideoFrame::Format: " << video_format;
}
return PIX_FMT_NONE;
}
} // namespace media