/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//#define LOG_NDEBUG 0
#define LOG_TAG "MPEG4Extractor"
#include <ctype.h>
#include <inttypes.h>
#include <memory>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <utils/Log.h>
#include "MPEG4Extractor.h"
#include "SampleTable.h"
#include "ItemTable.h"
#include "include/ESDS.h"
#include <media/ExtractorUtils.h>
#include <media/MediaTrack.h>
#include <media/stagefright/foundation/ABitReader.h>
#include <media/stagefright/foundation/ABuffer.h>
#include <media/stagefright/foundation/ADebug.h>
#include <media/stagefright/foundation/AMessage.h>
#include <media/stagefright/foundation/AUtils.h>
#include <media/stagefright/foundation/ByteUtils.h>
#include <media/stagefright/foundation/ColorUtils.h>
#include <media/stagefright/foundation/avc_utils.h>
#include <media/stagefright/foundation/hexdump.h>
#include <media/stagefright/MediaBufferBase.h>
#include <media/stagefright/MediaBufferGroup.h>
#include <media/stagefright/MediaDefs.h>
#include <media/stagefright/MetaData.h>
#include <utils/String8.h>
#include <byteswap.h>
#include "include/ID3.h"
#ifndef UINT32_MAX
#define UINT32_MAX (4294967295U)
#endif
namespace android {
enum {
// max track header chunk to return
kMaxTrackHeaderSize = 32,
// maximum size of an atom. Some atoms can be bigger according to the spec,
// but we only allow up to this size.
kMaxAtomSize = 64 * 1024 * 1024,
};
class MPEG4Source : public MediaTrack {
public:
// Caller retains ownership of both "dataSource" and "sampleTable".
MPEG4Source(MetaDataBase &format,
DataSourceBase *dataSource,
int32_t timeScale,
const sp<SampleTable> &sampleTable,
Vector<SidxEntry> &sidx,
const Trex *trex,
off64_t firstMoofOffset,
const sp<ItemTable> &itemTable);
virtual status_t init();
virtual status_t start(MetaDataBase *params = NULL);
virtual status_t stop();
virtual status_t getFormat(MetaDataBase &);
virtual status_t read(MediaBufferBase **buffer, const ReadOptions *options = NULL);
virtual bool supportNonblockingRead() { return true; }
virtual status_t fragmentedRead(MediaBufferBase **buffer, const ReadOptions *options = NULL);
virtual ~MPEG4Source();
private:
Mutex mLock;
MetaDataBase &mFormat;
DataSourceBase *mDataSource;
int32_t mTimescale;
sp<SampleTable> mSampleTable;
uint32_t mCurrentSampleIndex;
uint32_t mCurrentFragmentIndex;
Vector<SidxEntry> &mSegments;
const Trex *mTrex;
off64_t mFirstMoofOffset;
off64_t mCurrentMoofOffset;
off64_t mNextMoofOffset;
uint32_t mCurrentTime;
int32_t mLastParsedTrackId;
int32_t mTrackId;
int32_t mCryptoMode; // passed in from extractor
int32_t mDefaultIVSize; // passed in from extractor
uint8_t mCryptoKey[16]; // passed in from extractor
int32_t mDefaultEncryptedByteBlock;
int32_t mDefaultSkipByteBlock;
uint32_t mCurrentAuxInfoType;
uint32_t mCurrentAuxInfoTypeParameter;
int32_t mCurrentDefaultSampleInfoSize;
uint32_t mCurrentSampleInfoCount;
uint32_t mCurrentSampleInfoAllocSize;
uint8_t* mCurrentSampleInfoSizes;
uint32_t mCurrentSampleInfoOffsetCount;
uint32_t mCurrentSampleInfoOffsetsAllocSize;
uint64_t* mCurrentSampleInfoOffsets;
bool mIsAVC;
bool mIsHEVC;
size_t mNALLengthSize;
bool mStarted;
MediaBufferGroup *mGroup;
MediaBufferBase *mBuffer;
bool mWantsNALFragments;
uint8_t *mSrcBuffer;
bool mIsHeif;
sp<ItemTable> mItemTable;
size_t parseNALSize(const uint8_t *data) const;
status_t parseChunk(off64_t *offset);
status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
status_t parseTrackFragmentRun(off64_t offset, off64_t size);
status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
status_t parseSampleEncryption(off64_t offset);
struct TrackFragmentHeaderInfo {
enum Flags {
kBaseDataOffsetPresent = 0x01,
kSampleDescriptionIndexPresent = 0x02,
kDefaultSampleDurationPresent = 0x08,
kDefaultSampleSizePresent = 0x10,
kDefaultSampleFlagsPresent = 0x20,
kDurationIsEmpty = 0x10000,
};
uint32_t mTrackID;
uint32_t mFlags;
uint64_t mBaseDataOffset;
uint32_t mSampleDescriptionIndex;
uint32_t mDefaultSampleDuration;
uint32_t mDefaultSampleSize;
uint32_t mDefaultSampleFlags;
uint64_t mDataOffset;
};
TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
struct Sample {
off64_t offset;
size_t size;
uint32_t duration;
int32_t compositionOffset;
uint8_t iv[16];
Vector<size_t> clearsizes;
Vector<size_t> encryptedsizes;
};
Vector<Sample> mCurrentSamples;
MPEG4Source(const MPEG4Source &);
MPEG4Source &operator=(const MPEG4Source &);
};
// This custom data source wraps an existing one and satisfies requests
// falling entirely within a cached range from the cache while forwarding
// all remaining requests to the wrapped datasource.
// This is used to cache the full sampletable metadata for a single track,
// possibly wrapping multiple times to cover all tracks, i.e.
// Each CachedRangedDataSource caches the sampletable metadata for a single track.
struct CachedRangedDataSource : public DataSourceBase {
explicit CachedRangedDataSource(DataSourceBase *source);
virtual ~CachedRangedDataSource();
virtual status_t initCheck() const;
virtual ssize_t readAt(off64_t offset, void *data, size_t size);
virtual status_t getSize(off64_t *size);
virtual uint32_t flags();
status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
private:
Mutex mLock;
DataSourceBase *mSource;
bool mOwnsDataSource;
off64_t mCachedOffset;
size_t mCachedSize;
uint8_t *mCache;
void clearCache();
CachedRangedDataSource(const CachedRangedDataSource &);
CachedRangedDataSource &operator=(const CachedRangedDataSource &);
};
CachedRangedDataSource::CachedRangedDataSource(DataSourceBase *source)
: mSource(source),
mOwnsDataSource(false),
mCachedOffset(0),
mCachedSize(0),
mCache(NULL) {
}
CachedRangedDataSource::~CachedRangedDataSource() {
clearCache();
if (mOwnsDataSource) {
delete (CachedRangedDataSource*)mSource;
}
}
void CachedRangedDataSource::clearCache() {
if (mCache) {
free(mCache);
mCache = NULL;
}
mCachedOffset = 0;
mCachedSize = 0;
}
status_t CachedRangedDataSource::initCheck() const {
return mSource->initCheck();
}
ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
Mutex::Autolock autoLock(mLock);
if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
memcpy(data, &mCache[offset - mCachedOffset], size);
return size;
}
return mSource->readAt(offset, data, size);
}
status_t CachedRangedDataSource::getSize(off64_t *size) {
return mSource->getSize(size);
}
uint32_t CachedRangedDataSource::flags() {
return mSource->flags();
}
status_t CachedRangedDataSource::setCachedRange(off64_t offset,
size_t size,
bool assumeSourceOwnershipOnSuccess) {
Mutex::Autolock autoLock(mLock);
clearCache();
mCache = (uint8_t *)malloc(size);
if (mCache == NULL) {
return -ENOMEM;
}
mCachedOffset = offset;
mCachedSize = size;
ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
if (err < (ssize_t)size) {
clearCache();
return ERROR_IO;
}
mOwnsDataSource = assumeSourceOwnershipOnSuccess;
return OK;
}
////////////////////////////////////////////////////////////////////////////////
static const bool kUseHexDump = false;
static const char *FourCC2MIME(uint32_t fourcc) {
switch (fourcc) {
case FOURCC('m', 'p', '4', 'a'):
return MEDIA_MIMETYPE_AUDIO_AAC;
case FOURCC('s', 'a', 'm', 'r'):
return MEDIA_MIMETYPE_AUDIO_AMR_NB;
case FOURCC('s', 'a', 'w', 'b'):
return MEDIA_MIMETYPE_AUDIO_AMR_WB;
case FOURCC('m', 'p', '4', 'v'):
return MEDIA_MIMETYPE_VIDEO_MPEG4;
case FOURCC('s', '2', '6', '3'):
case FOURCC('h', '2', '6', '3'):
case FOURCC('H', '2', '6', '3'):
return MEDIA_MIMETYPE_VIDEO_H263;
case FOURCC('a', 'v', 'c', '1'):
return MEDIA_MIMETYPE_VIDEO_AVC;
case FOURCC('h', 'v', 'c', '1'):
case FOURCC('h', 'e', 'v', '1'):
return MEDIA_MIMETYPE_VIDEO_HEVC;
default:
ALOGW("Unknown fourcc: %c%c%c%c",
(fourcc >> 24) & 0xff,
(fourcc >> 16) & 0xff,
(fourcc >> 8) & 0xff,
fourcc & 0xff
);
return "application/octet-stream";
}
}
static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
// AMR NB audio is always mono, 8kHz
*channels = 1;
*rate = 8000;
return true;
} else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
// AMR WB audio is always mono, 16kHz
*channels = 1;
*rate = 16000;
return true;
}
return false;
}
MPEG4Extractor::MPEG4Extractor(DataSourceBase *source, const char *mime)
: mMoofOffset(0),
mMoofFound(false),
mMdatFound(false),
mDataSource(source),
mCachedSource(NULL),
mInitCheck(NO_INIT),
mHeaderTimescale(0),
mIsQT(false),
mIsHeif(false),
mHasMoovBox(false),
mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
mFirstTrack(NULL),
mLastTrack(NULL) {
ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
}
MPEG4Extractor::~MPEG4Extractor() {
Track *track = mFirstTrack;
while (track) {
Track *next = track->next;
delete track;
track = next;
}
mFirstTrack = mLastTrack = NULL;
for (size_t i = 0; i < mPssh.size(); i++) {
delete [] mPssh[i].data;
}
mPssh.clear();
delete mCachedSource;
}
uint32_t MPEG4Extractor::flags() const {
return CAN_PAUSE |
((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
(CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
}
status_t MPEG4Extractor::getMetaData(MetaDataBase &meta) {
status_t err;
if ((err = readMetaData()) != OK) {
return UNKNOWN_ERROR;
}
meta = mFileMetaData;
return OK;
}
size_t MPEG4Extractor::countTracks() {
status_t err;
if ((err = readMetaData()) != OK) {
ALOGV("MPEG4Extractor::countTracks: no tracks");
return 0;
}
size_t n = 0;
Track *track = mFirstTrack;
while (track) {
++n;
track = track->next;
}
ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
return n;
}
status_t MPEG4Extractor::getTrackMetaData(
MetaDataBase &meta,
size_t index, uint32_t flags) {
status_t err;
if ((err = readMetaData()) != OK) {
return UNKNOWN_ERROR;
}
Track *track = mFirstTrack;
while (index > 0) {
if (track == NULL) {
return UNKNOWN_ERROR;
}
track = track->next;
--index;
}
if (track == NULL) {
return UNKNOWN_ERROR;
}
[=] {
int64_t duration;
int32_t samplerate;
if (track->has_elst && mHeaderTimescale != 0 &&
track->meta.findInt64(kKeyDuration, &duration) &&
track->meta.findInt32(kKeySampleRate, &samplerate)) {
track->has_elst = false;
if (track->elst_segment_duration > INT64_MAX) {
return;
}
int64_t segment_duration = track->elst_segment_duration;
int64_t media_time = track->elst_media_time;
int64_t halfscale = mHeaderTimescale / 2;
ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
", halfscale = %" PRId64 ", timescale = %d",
segment_duration,
media_time,
halfscale,
mHeaderTimescale);
int64_t delay;
// delay = ((media_time * samplerate) + halfscale) / mHeaderTimescale;
if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
__builtin_add_overflow(delay, halfscale, &delay) ||
(delay /= mHeaderTimescale, false) ||
delay > INT32_MAX ||
delay < INT32_MIN) {
return;
}
ALOGV("delay = %" PRId64, delay);
track->meta.setInt32(kKeyEncoderDelay, delay);
int64_t scaled_duration;
// scaled_duration = duration * mHeaderTimescale;
if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
return;
}
ALOGV("scaled_duration = %" PRId64, scaled_duration);
int64_t segment_end;
int64_t padding;
// padding = scaled_duration - ((segment_duration + media_time) * 1000000);
if (__builtin_add_overflow(segment_duration, media_time, &segment_end) ||
__builtin_mul_overflow(segment_end, 1000000, &segment_end) ||
__builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
return;
}
ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
if (padding < 0) {
// track duration from media header (which is what kKeyDuration is) might
// be slightly shorter than the segment duration, which would make the
// padding negative. Clamp to zero.
padding = 0;
}
int64_t paddingsamples;
int64_t halfscale_e6;
int64_t timescale_e6;
// paddingsamples = ((padding * samplerate) + (halfscale * 1000000))
// / (mHeaderTimescale * 1000000);
if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
__builtin_mul_overflow(halfscale, 1000000, &halfscale_e6) ||
__builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
__builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
(paddingsamples /= timescale_e6, false) ||
paddingsamples > INT32_MAX) {
return;
}
ALOGV("paddingsamples = %" PRId64, paddingsamples);
track->meta.setInt32(kKeyEncoderPadding, paddingsamples);
}
}();
if ((flags & kIncludeExtensiveMetaData)
&& !track->includes_expensive_metadata) {
track->includes_expensive_metadata = true;
const char *mime;
CHECK(track->meta.findCString(kKeyMIMEType, &mime));
if (!strncasecmp("video/", mime, 6)) {
// MPEG2 tracks do not provide CSD, so read the stream header
if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
off64_t offset;
size_t size;
if (track->sampleTable->getMetaDataForSample(
0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
if (size > kMaxTrackHeaderSize) {
size = kMaxTrackHeaderSize;
}
uint8_t header[kMaxTrackHeaderSize];
if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
track->meta.setData(kKeyStreamHeader, 'mdat', header, size);
}
}
}
if (mMoofOffset > 0) {
int64_t duration;
if (track->meta.findInt64(kKeyDuration, &duration)) {
// nothing fancy, just pick a frame near 1/4th of the duration
track->meta.setInt64(
kKeyThumbnailTime, duration / 4);
}
} else {
uint32_t sampleIndex;
uint32_t sampleTime;
if (track->timescale != 0 &&
track->sampleTable->findThumbnailSample(&sampleIndex) == OK
&& track->sampleTable->getMetaDataForSample(
sampleIndex, NULL /* offset */, NULL /* size */,
&sampleTime) == OK) {
track->meta.setInt64(
kKeyThumbnailTime,
((int64_t)sampleTime * 1000000) / track->timescale);
}
}
}
}
meta = track->meta;
return OK;
}
status_t MPEG4Extractor::readMetaData() {
if (mInitCheck != NO_INIT) {
return mInitCheck;
}
off64_t offset = 0;
status_t err;
bool sawMoovOrSidx = false;
while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
(mIsHeif && (mPreferHeif || !mHasMoovBox) &&
(mItemTable != NULL) && mItemTable->isValid()))) {
off64_t orig_offset = offset;
err = parseChunk(&offset, 0);
if (err != OK && err != UNKNOWN_ERROR) {
break;
} else if (offset <= orig_offset) {
// only continue parsing if the offset was advanced,
// otherwise we might end up in an infinite loop
ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
err = ERROR_MALFORMED;
break;
} else if (err == UNKNOWN_ERROR) {
sawMoovOrSidx = true;
}
}
if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
off64_t exifOffset;
size_t exifSize;
if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
mFileMetaData.setInt64(kKeyExifOffset, (int64_t)exifOffset);
mFileMetaData.setInt64(kKeyExifSize, (int64_t)exifSize);
}
for (uint32_t imageIndex = 0;
imageIndex < mItemTable->countImages(); imageIndex++) {
sp<MetaData> meta = mItemTable->getImageMeta(imageIndex);
if (meta == NULL) {
ALOGE("heif image %u has no meta!", imageIndex);
continue;
}
// Some heif files advertise image sequence brands (eg. 'hevc') in
// ftyp box, but don't have any valid tracks in them. Instead of
// reporting the entire file as malformed, we override the error
// to allow still images to be extracted.
if (err != OK) {
ALOGW("Extracting still images only");
err = OK;
}
mInitCheck = OK;
ALOGV("adding HEIF image track %u", imageIndex);
Track *track = new Track;
track->next = NULL;
if (mLastTrack != NULL) {
mLastTrack->next = track;
} else {
mFirstTrack = track;
}
mLastTrack = track;
track->meta = *(meta.get());
track->meta.setInt32(kKeyTrackID, imageIndex);
track->includes_expensive_metadata = false;
track->skipTrack = false;
track->timescale = 1000000;
}
}
if (mInitCheck == OK) {
if (findTrackByMimePrefix("video/") != NULL) {
mFileMetaData.setCString(
kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
} else if (findTrackByMimePrefix("audio/") != NULL) {
mFileMetaData.setCString(kKeyMIMEType, "audio/mp4");
} else if (findTrackByMimePrefix(
MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
mFileMetaData.setCString(
kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF);
} else {
mFileMetaData.setCString(kKeyMIMEType, "application/octet-stream");
}
} else {
mInitCheck = err;
}
CHECK_NE(err, (status_t)NO_INIT);
// copy pssh data into file metadata
uint64_t psshsize = 0;
for (size_t i = 0; i < mPssh.size(); i++) {
psshsize += 20 + mPssh[i].datalen;
}
if (psshsize > 0 && psshsize <= UINT32_MAX) {
char *buf = (char*)malloc(psshsize);
if (!buf) {
ALOGE("b/28471206");
return NO_MEMORY;
}
char *ptr = buf;
for (size_t i = 0; i < mPssh.size(); i++) {
memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
ptr += (20 + mPssh[i].datalen);
}
mFileMetaData.setData(kKeyPssh, 'pssh', buf, psshsize);
free(buf);
}
return mInitCheck;
}
struct PathAdder {
PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
: mPath(path) {
mPath->push(chunkType);
}
~PathAdder() {
mPath->pop();
}
private:
Vector<uint32_t> *mPath;
PathAdder(const PathAdder &);
PathAdder &operator=(const PathAdder &);
};
static bool underMetaDataPath(const Vector<uint32_t> &path) {
return path.size() >= 5
&& path[0] == FOURCC('m', 'o', 'o', 'v')
&& path[1] == FOURCC('u', 'd', 't', 'a')
&& path[2] == FOURCC('m', 'e', 't', 'a')
&& path[3] == FOURCC('i', 'l', 's', 't');
}
static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
return path.size() >= 2
&& path[0] == FOURCC('m', 'o', 'o', 'v')
&& path[1] == FOURCC('m', 'e', 't', 'a')
&& (depth == 2
|| (depth == 3
&& (path[2] == FOURCC('h', 'd', 'l', 'r')
|| path[2] == FOURCC('i', 'l', 's', 't')
|| path[2] == FOURCC('k', 'e', 'y', 's'))));
}
// Given a time in seconds since Jan 1 1904, produce a human-readable string.
static bool convertTimeToDate(int64_t time_1904, String8 *s) {
// delta between mpeg4 time and unix epoch time
static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
if (time_1904 < INT64_MIN + delta) {
return false;
}
time_t time_1970 = time_1904 - delta;
char tmp[32];
struct tm* tm = gmtime(&time_1970);
if (tm != NULL &&
strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
s->setTo(tmp);
return true;
}
return false;
}
status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
if (*offset < 0) {
ALOGE("b/23540914");
return ERROR_MALFORMED;
}
if (depth > 100) {
ALOGE("b/27456299");
return ERROR_MALFORMED;
}
uint32_t hdr[2];
if (mDataSource->readAt(*offset, hdr, 8) < 8) {
return ERROR_IO;
}
uint64_t chunk_size = ntohl(hdr[0]);
int32_t chunk_type = ntohl(hdr[1]);
off64_t data_offset = *offset + 8;
if (chunk_size == 1) {
if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
return ERROR_IO;
}
chunk_size = ntoh64(chunk_size);
data_offset += 8;
if (chunk_size < 16) {
// The smallest valid chunk is 16 bytes long in this case.
return ERROR_MALFORMED;
}
} else if (chunk_size == 0) {
if (depth == 0) {
// atom extends to end of file
off64_t sourceSize;
if (mDataSource->getSize(&sourceSize) == OK) {
chunk_size = (sourceSize - *offset);
} else {
// XXX could we just pick a "sufficiently large" value here?
ALOGE("atom size is 0, and data source has no size");
return ERROR_MALFORMED;
}
} else {
// not allowed for non-toplevel atoms, skip it
*offset += 4;
return OK;
}
} else if (chunk_size < 8) {
// The smallest valid chunk is 8 bytes long.
ALOGE("invalid chunk size: %" PRIu64, chunk_size);
return ERROR_MALFORMED;
}
char chunk[5];
MakeFourCCString(chunk_type, chunk);
ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
if (kUseHexDump) {
static const char kWhitespace[] = " ";
const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
char buffer[256];
size_t n = chunk_size;
if (n > sizeof(buffer)) {
n = sizeof(buffer);
}
if (mDataSource->readAt(*offset, buffer, n)
< (ssize_t)n) {
return ERROR_IO;
}
hexdump(buffer, n);
}
PathAdder autoAdder(&mPath, chunk_type);
// (data_offset - *offset) is either 8 or 16
off64_t chunk_data_size = chunk_size - (data_offset - *offset);
if (chunk_data_size < 0) {
ALOGE("b/23540914");
return ERROR_MALFORMED;
}
if (chunk_type != FOURCC('m', 'd', 'a', 't') && chunk_data_size > kMaxAtomSize) {
char errMsg[100];
sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
ALOGE("%s (b/28615448)", errMsg);
android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
return ERROR_MALFORMED;
}
if (chunk_type != FOURCC('c', 'p', 'r', 't')
&& chunk_type != FOURCC('c', 'o', 'v', 'r')
&& mPath.size() == 5 && underMetaDataPath(mPath)) {
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset;
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
return OK;
}
switch(chunk_type) {
case FOURCC('m', 'o', 'o', 'v'):
case FOURCC('t', 'r', 'a', 'k'):
case FOURCC('m', 'd', 'i', 'a'):
case FOURCC('m', 'i', 'n', 'f'):
case FOURCC('d', 'i', 'n', 'f'):
case FOURCC('s', 't', 'b', 'l'):
case FOURCC('m', 'v', 'e', 'x'):
case FOURCC('m', 'o', 'o', 'f'):
case FOURCC('t', 'r', 'a', 'f'):
case FOURCC('m', 'f', 'r', 'a'):
case FOURCC('u', 'd', 't', 'a'):
case FOURCC('i', 'l', 's', 't'):
case FOURCC('s', 'i', 'n', 'f'):
case FOURCC('s', 'c', 'h', 'i'):
case FOURCC('e', 'd', 't', 's'):
case FOURCC('w', 'a', 'v', 'e'):
{
if (chunk_type == FOURCC('m', 'o', 'o', 'v') && depth != 0) {
ALOGE("moov: depth %d", depth);
return ERROR_MALFORMED;
}
if (chunk_type == FOURCC('m', 'o', 'o', 'v') && mInitCheck == OK) {
ALOGE("duplicate moov");
return ERROR_MALFORMED;
}
if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
// store the offset of the first segment
mMoofFound = true;
mMoofOffset = *offset;
}
if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
if (mDataSource->flags()
& (DataSourceBase::kWantsPrefetching
| DataSourceBase::kIsCachingDataSource)) {
CachedRangedDataSource *cachedSource =
new CachedRangedDataSource(mDataSource);
if (cachedSource->setCachedRange(
*offset, chunk_size,
mCachedSource != NULL /* assume ownership on success */) == OK) {
mDataSource = mCachedSource = cachedSource;
} else {
delete cachedSource;
}
}
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
mLastTrack->sampleTable = new SampleTable(mDataSource);
}
bool isTrack = false;
if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
if (depth != 1) {
ALOGE("trak: depth %d", depth);
return ERROR_MALFORMED;
}
isTrack = true;
ALOGV("adding new track");
Track *track = new Track;
track->next = NULL;
if (mLastTrack) {
mLastTrack->next = track;
} else {
mFirstTrack = track;
}
mLastTrack = track;
track->includes_expensive_metadata = false;
track->skipTrack = false;
track->timescale = 0;
track->meta.setCString(kKeyMIMEType, "application/octet-stream");
track->has_elst = false;
track->subsample_encryption = false;
}
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset;
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
if (isTrack) {
mLastTrack->skipTrack = true;
break;
}
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
if (isTrack) {
int32_t trackId;
// There must be exact one track header per track.
if (!mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
mLastTrack->skipTrack = true;
}
status_t err = verifyTrack(mLastTrack);
if (err != OK) {
mLastTrack->skipTrack = true;
}
if (mLastTrack->skipTrack) {
ALOGV("skipping this track...");
Track *cur = mFirstTrack;
if (cur == mLastTrack) {
delete cur;
mFirstTrack = mLastTrack = NULL;
} else {
while (cur && cur->next != mLastTrack) {
cur = cur->next;
}
if (cur) {
cur->next = NULL;
}
delete mLastTrack;
mLastTrack = cur;
}
return OK;
}
} else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
mInitCheck = OK;
return UNKNOWN_ERROR; // Return a dummy error.
}
break;
}
case FOURCC('s', 'c', 'h', 'm'):
{
*offset += chunk_size;
if (!mLastTrack) {
return ERROR_MALFORMED;
}
uint32_t scheme_type;
if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
return ERROR_IO;
}
scheme_type = ntohl(scheme_type);
int32_t mode = kCryptoModeUnencrypted;
switch(scheme_type) {
case FOURCC('c', 'b', 'c', '1'):
{
mode = kCryptoModeAesCbc;
break;
}
case FOURCC('c', 'b', 'c', 's'):
{
mode = kCryptoModeAesCbc;
mLastTrack->subsample_encryption = true;
break;
}
case FOURCC('c', 'e', 'n', 'c'):
{
mode = kCryptoModeAesCtr;
break;
}
case FOURCC('c', 'e', 'n', 's'):
{
mode = kCryptoModeAesCtr;
mLastTrack->subsample_encryption = true;
break;
}
}
if (mode != kCryptoModeUnencrypted) {
mLastTrack->meta.setInt32(kKeyCryptoMode, mode);
}
break;
}
case FOURCC('e', 'l', 's', 't'):
{
*offset += chunk_size;
if (!mLastTrack) {
return ERROR_MALFORMED;
}
// See 14496-12 8.6.6
uint8_t version;
if (mDataSource->readAt(data_offset, &version, 1) < 1) {
return ERROR_IO;
}
uint32_t entry_count;
if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
return ERROR_IO;
}
if (entry_count != 1) {
// we only support a single entry at the moment, for gapless playback
ALOGW("ignoring edit list with %d entries", entry_count);
} else {
off64_t entriesoffset = data_offset + 8;
uint64_t segment_duration;
int64_t media_time;
if (version == 1) {
if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
!mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
return ERROR_IO;
}
} else if (version == 0) {
uint32_t sd;
int32_t mt;
if (!mDataSource->getUInt32(entriesoffset, &sd) ||
!mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
return ERROR_IO;
}
segment_duration = sd;
media_time = mt;
} else {
return ERROR_IO;
}
// save these for later, because the elst atom might precede
// the atoms that actually gives us the duration and sample rate
// needed to calculate the padding and delay values
mLastTrack->has_elst = true;
mLastTrack->elst_media_time = media_time;
mLastTrack->elst_segment_duration = segment_duration;
}
break;
}
case FOURCC('f', 'r', 'm', 'a'):
{
*offset += chunk_size;
uint32_t original_fourcc;
if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
return ERROR_IO;
}
original_fourcc = ntohl(original_fourcc);
ALOGV("read original format: %d", original_fourcc);
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
uint32_t num_channels = 0;
uint32_t sample_rate = 0;
if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
}
break;
}
case FOURCC('t', 'e', 'n', 'c'):
{
*offset += chunk_size;
if (chunk_size < 32) {
return ERROR_MALFORMED;
}
// tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
// default IV size, 16 bytes default KeyID
// (ISO 23001-7)
uint8_t version;
if (mDataSource->readAt(data_offset, &version, sizeof(version))
< (ssize_t)sizeof(version)) {
return ERROR_IO;
}
uint8_t buf[4];
memset(buf, 0, 4);
if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
return ERROR_IO;
}
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
uint8_t defaultEncryptedByteBlock = 0;
uint8_t defaultSkipByteBlock = 0;
uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
if (version == 1) {
uint32_t pattern = buf[2];
defaultEncryptedByteBlock = pattern >> 4;
defaultSkipByteBlock = pattern & 0xf;
if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
// use (1,0) to mean "encrypt everything"
defaultEncryptedByteBlock = 1;
}
} else if (mLastTrack->subsample_encryption) {
ALOGW("subsample_encryption should be version 1");
} else if (defaultAlgorithmId > 1) {
// only 0 (clear) and 1 (AES-128) are valid
ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
defaultAlgorithmId = 1;
}
memset(buf, 0, 4);
if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
return ERROR_IO;
}
uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
// only unencrypted data must have 0 IV size
return ERROR_MALFORMED;
} else if (defaultIVSize != 0 &&
defaultIVSize != 8 &&
defaultIVSize != 16) {
return ERROR_MALFORMED;
}
uint8_t defaultKeyId[16];
if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
return ERROR_IO;
}
sp<ABuffer> defaultConstantIv;
if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
uint8_t ivlength;
if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
< (ssize_t)sizeof(ivlength)) {
return ERROR_IO;
}
if (ivlength != 8 && ivlength != 16) {
ALOGW("unsupported IV length: %u", ivlength);
return ERROR_MALFORMED;
}
defaultConstantIv = new ABuffer(ivlength);
if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
< (ssize_t)ivlength) {
return ERROR_IO;
}
defaultConstantIv->setRange(0, ivlength);
}
int32_t tmpAlgorithmId;
if (!mLastTrack->meta.findInt32(kKeyCryptoMode, &tmpAlgorithmId)) {
mLastTrack->meta.setInt32(kKeyCryptoMode, defaultAlgorithmId);
}
mLastTrack->meta.setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
mLastTrack->meta.setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
mLastTrack->meta.setInt32(kKeyEncryptedByteBlock, defaultEncryptedByteBlock);
mLastTrack->meta.setInt32(kKeySkipByteBlock, defaultSkipByteBlock);
if (defaultConstantIv != NULL) {
mLastTrack->meta.setData(kKeyCryptoIV, 'dciv', defaultConstantIv->data(), defaultConstantIv->size());
}
break;
}
case FOURCC('t', 'k', 'h', 'd'):
{
*offset += chunk_size;
status_t err;
if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
return err;
}
break;
}
case FOURCC('t', 'r', 'e', 'f'):
{
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset;
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC('t', 'h', 'm', 'b'):
{
*offset += chunk_size;
if (mLastTrack != NULL) {
// Skip thumbnail track for now since we don't have an
// API to retrieve it yet.
// The thumbnail track can't be accessed by negative index or time,
// because each timed sample has its own corresponding thumbnail
// in the thumbnail track. We'll need a dedicated API to retrieve
// thumbnail at time instead.
mLastTrack->skipTrack = true;
}
break;
}
case FOURCC('p', 's', 's', 'h'):
{
*offset += chunk_size;
PsshInfo pssh;
if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
return ERROR_IO;
}
uint32_t psshdatalen = 0;
if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
return ERROR_IO;
}
pssh.datalen = ntohl(psshdatalen);
ALOGV("pssh data size: %d", pssh.datalen);
if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
// pssh data length exceeds size of containing box
return ERROR_MALFORMED;
}
pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
if (pssh.data == NULL) {
return ERROR_MALFORMED;
}
ALOGV("allocated pssh @ %p", pssh.data);
ssize_t requested = (ssize_t) pssh.datalen;
if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
delete[] pssh.data;
return ERROR_IO;
}
mPssh.push_back(pssh);
break;
}
case FOURCC('m', 'd', 'h', 'd'):
{
*offset += chunk_size;
if (chunk_data_size < 4 || mLastTrack == NULL) {
return ERROR_MALFORMED;
}
uint8_t version;
if (mDataSource->readAt(
data_offset, &version, sizeof(version))
< (ssize_t)sizeof(version)) {
return ERROR_IO;
}
off64_t timescale_offset;
if (version == 1) {
timescale_offset = data_offset + 4 + 16;
} else if (version == 0) {
timescale_offset = data_offset + 4 + 8;
} else {
return ERROR_IO;
}
uint32_t timescale;
if (mDataSource->readAt(
timescale_offset, ×cale, sizeof(timescale))
< (ssize_t)sizeof(timescale)) {
return ERROR_IO;
}
if (!timescale) {
ALOGE("timescale should not be ZERO.");
return ERROR_MALFORMED;
}
mLastTrack->timescale = ntohl(timescale);
// 14496-12 says all ones means indeterminate, but some files seem to use
// 0 instead. We treat both the same.
int64_t duration = 0;
if (version == 1) {
if (mDataSource->readAt(
timescale_offset + 4, &duration, sizeof(duration))
< (ssize_t)sizeof(duration)) {
return ERROR_IO;
}
if (duration != -1) {
duration = ntoh64(duration);
}
} else {
uint32_t duration32;
if (mDataSource->readAt(
timescale_offset + 4, &duration32, sizeof(duration32))
< (ssize_t)sizeof(duration32)) {
return ERROR_IO;
}
if (duration32 != 0xffffffff) {
duration = ntohl(duration32);
}
}
if (duration != 0 && mLastTrack->timescale != 0) {
mLastTrack->meta.setInt64(
kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
}
uint8_t lang[2];
off64_t lang_offset;
if (version == 1) {
lang_offset = timescale_offset + 4 + 8;
} else if (version == 0) {
lang_offset = timescale_offset + 4 + 4;
} else {
return ERROR_IO;
}
if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
< (ssize_t)sizeof(lang)) {
return ERROR_IO;
}
// To get the ISO-639-2/T three character language code
// 1 bit pad followed by 3 5-bits characters. Each character
// is packed as the difference between its ASCII value and 0x60.
char lang_code[4];
lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
lang_code[2] = (lang[1] & 0x1f) + 0x60;
lang_code[3] = '\0';
mLastTrack->meta.setCString(
kKeyMediaLanguage, lang_code);
break;
}
case FOURCC('s', 't', 's', 'd'):
{
uint8_t buffer[8];
if (chunk_data_size < (off64_t)sizeof(buffer)) {
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, 8) < 8) {
return ERROR_IO;
}
if (U32_AT(buffer) != 0) {
// Should be version 0, flags 0.
return ERROR_MALFORMED;
}
uint32_t entry_count = U32_AT(&buffer[4]);
if (entry_count > 1) {
// For 3GPP timed text, there could be multiple tx3g boxes contain
// multiple text display formats. These formats will be used to
// display the timed text.
// For encrypted files, there may also be more than one entry.
const char *mime;
if (mLastTrack == NULL)
return ERROR_MALFORMED;
CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
strcasecmp(mime, "application/octet-stream")) {
// For now we only support a single type of media per track.
mLastTrack->skipTrack = true;
*offset += chunk_size;
break;
}
}
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset + 8;
for (uint32_t i = 0; i < entry_count; ++i) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC('m', 'e', 't', 't'):
{
*offset += chunk_size;
if (mLastTrack == NULL)
return ERROR_MALFORMED;
auto buffer = heapbuffer<uint8_t>(chunk_data_size);
if (buffer.get() == NULL) {
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
String8 mimeFormat((const char *)(buffer.get()), chunk_data_size);
mLastTrack->meta.setCString(kKeyMIMEType, mimeFormat.string());
break;
}
case FOURCC('m', 'p', '4', 'a'):
case FOURCC('e', 'n', 'c', 'a'):
case FOURCC('s', 'a', 'm', 'r'):
case FOURCC('s', 'a', 'w', 'b'):
{
if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')
&& depth >= 1 && mPath[depth - 1] == FOURCC('w', 'a', 'v', 'e')) {
// Ignore mp4a embedded in QT wave atom
*offset += chunk_size;
break;
}
uint8_t buffer[8 + 20];
if (chunk_data_size < (ssize_t)sizeof(buffer)) {
// Basic AudioSampleEntry size.
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
return ERROR_IO;
}
uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
uint16_t version = U16_AT(&buffer[8]);
uint32_t num_channels = U16_AT(&buffer[16]);
uint16_t sample_size = U16_AT(&buffer[18]);
uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
if (mLastTrack == NULL)
return ERROR_MALFORMED;
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset + sizeof(buffer);
if (mIsQT && chunk_type == FOURCC('m', 'p', '4', 'a')) {
if (version == 1) {
if (mDataSource->readAt(*offset, buffer, 16) < 16) {
return ERROR_IO;
}
#if 0
U32_AT(buffer); // samples per packet
U32_AT(&buffer[4]); // bytes per packet
U32_AT(&buffer[8]); // bytes per frame
U32_AT(&buffer[12]); // bytes per sample
#endif
*offset += 16;
} else if (version == 2) {
uint8_t v2buffer[36];
if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
return ERROR_IO;
}
#if 0
U32_AT(v2buffer); // size of struct only
sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
num_channels = U32_AT(&v2buffer[12]); // num audio channels
U32_AT(&v2buffer[16]); // always 0x7f000000
sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
U32_AT(&v2buffer[24]); // format specifc flags
U32_AT(&v2buffer[28]); // const bytes per audio packet
U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
#endif
*offset += 36;
}
}
if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
// if the chunk type is enca, we'll get the type from the frma box later
mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
}
ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
chunk, num_channels, sample_size, sample_rate);
mLastTrack->meta.setInt32(kKeyChannelCount, num_channels);
mLastTrack->meta.setInt32(kKeySampleRate, sample_rate);
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC('m', 'p', '4', 'v'):
case FOURCC('e', 'n', 'c', 'v'):
case FOURCC('s', '2', '6', '3'):
case FOURCC('H', '2', '6', '3'):
case FOURCC('h', '2', '6', '3'):
case FOURCC('a', 'v', 'c', '1'):
case FOURCC('h', 'v', 'c', '1'):
case FOURCC('h', 'e', 'v', '1'):
{
uint8_t buffer[78];
if (chunk_data_size < (ssize_t)sizeof(buffer)) {
// Basic VideoSampleEntry size.
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
return ERROR_IO;
}
uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
uint16_t width = U16_AT(&buffer[6 + 18]);
uint16_t height = U16_AT(&buffer[6 + 20]);
// The video sample is not standard-compliant if it has invalid dimension.
// Use some default width and height value, and
// let the decoder figure out the actual width and height (and thus
// be prepared for INFO_FOMRAT_CHANGED event).
if (width == 0) width = 352;
if (height == 0) height = 288;
// printf("*** coding='%s' width=%d height=%d\n",
// chunk, width, height);
if (mLastTrack == NULL)
return ERROR_MALFORMED;
if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
// if the chunk type is encv, we'll get the type from the frma box later
mLastTrack->meta.setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
}
mLastTrack->meta.setInt32(kKeyWidth, width);
mLastTrack->meta.setInt32(kKeyHeight, height);
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset + sizeof(buffer);
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC('s', 't', 'c', 'o'):
case FOURCC('c', 'o', '6', '4'):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
return ERROR_MALFORMED;
}
status_t err =
mLastTrack->sampleTable->setChunkOffsetParams(
chunk_type, data_offset, chunk_data_size);
*offset += chunk_size;
if (err != OK) {
return err;
}
break;
}
case FOURCC('s', 't', 's', 'c'):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
return ERROR_MALFORMED;
status_t err =
mLastTrack->sampleTable->setSampleToChunkParams(
data_offset, chunk_data_size);
*offset += chunk_size;
if (err != OK) {
return err;
}
break;
}
case FOURCC('s', 't', 's', 'z'):
case FOURCC('s', 't', 'z', '2'):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
return ERROR_MALFORMED;
}
status_t err =
mLastTrack->sampleTable->setSampleSizeParams(
chunk_type, data_offset, chunk_data_size);
*offset += chunk_size;
if (err != OK) {
return err;
}
size_t max_size;
err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
if (err != OK) {
return err;
}
if (max_size != 0) {
// Assume that a given buffer only contains at most 10 chunks,
// each chunk originally prefixed with a 2 byte length will
// have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
// and thus will grow by 2 bytes per chunk.
if (max_size > SIZE_MAX - 10 * 2) {
ALOGE("max sample size too big: %zu", max_size);
return ERROR_MALFORMED;
}
mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size + 10 * 2);
} else {
// No size was specified. Pick a conservatively large size.
uint32_t width, height;
if (!mLastTrack->meta.findInt32(kKeyWidth, (int32_t*)&width) ||
!mLastTrack->meta.findInt32(kKeyHeight,(int32_t*) &height)) {
ALOGE("No width or height, assuming worst case 1080p");
width = 1920;
height = 1080;
} else {
// A resolution was specified, check that it's not too big. The values below
// were chosen so that the calculations below don't cause overflows, they're
// not indicating that resolutions up to 32kx32k are actually supported.
if (width > 32768 || height > 32768) {
ALOGE("can't support %u x %u video", width, height);
return ERROR_MALFORMED;
}
}
const char *mime;
CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
if (!strncmp(mime, "audio/", 6)) {
// for audio, use 128KB
max_size = 1024 * 128;
} else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
|| !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
// AVC & HEVC requires compression ratio of at least 2, and uses
// macroblocks
max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
} else {
// For all other formats there is no minimum compression
// ratio. Use compression ratio of 1.
max_size = width * height * 3 / 2;
}
// HACK: allow 10% overhead
// TODO: read sample size from traf atom for fragmented MPEG4.
max_size += max_size / 10;
mLastTrack->meta.setInt32(kKeyMaxInputSize, max_size);
}
// NOTE: setting another piece of metadata invalidates any pointers (such as the
// mimetype) previously obtained, so don't cache them.
const char *mime;
CHECK(mLastTrack->meta.findCString(kKeyMIMEType, &mime));
// Calculate average frame rate.
if (!strncasecmp("video/", mime, 6)) {
size_t nSamples = mLastTrack->sampleTable->countSamples();
if (nSamples == 0) {
int32_t trackId;
if (mLastTrack->meta.findInt32(kKeyTrackID, &trackId)) {
for (size_t i = 0; i < mTrex.size(); i++) {
Trex *t = &mTrex.editItemAt(i);
if (t->track_ID == (uint32_t) trackId) {
if (t->default_sample_duration > 0) {
int32_t frameRate =
mLastTrack->timescale / t->default_sample_duration;
mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
}
break;
}
}
}
} else {
int64_t durationUs;
if (mLastTrack->meta.findInt64(kKeyDuration, &durationUs)) {
if (durationUs > 0) {
int32_t frameRate = (nSamples * 1000000LL +
(durationUs >> 1)) / durationUs;
mLastTrack->meta.setInt32(kKeyFrameRate, frameRate);
}
}
ALOGV("setting frame count %zu", nSamples);
mLastTrack->meta.setInt32(kKeyFrameCount, nSamples);
}
}
break;
}
case FOURCC('s', 't', 't', 's'):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
return ERROR_MALFORMED;
*offset += chunk_size;
status_t err =
mLastTrack->sampleTable->setTimeToSampleParams(
data_offset, chunk_data_size);
if (err != OK) {
return err;
}
break;
}
case FOURCC('c', 't', 't', 's'):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
return ERROR_MALFORMED;
*offset += chunk_size;
status_t err =
mLastTrack->sampleTable->setCompositionTimeToSampleParams(
data_offset, chunk_data_size);
if (err != OK) {
return err;
}
break;
}
case FOURCC('s', 't', 's', 's'):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
return ERROR_MALFORMED;
*offset += chunk_size;
status_t err =
mLastTrack->sampleTable->setSyncSampleParams(
data_offset, chunk_data_size);
if (err != OK) {
return err;
}
break;
}
// \xA9xyz
case FOURCC(0xA9, 'x', 'y', 'z'):
{
*offset += chunk_size;
// Best case the total data length inside "\xA9xyz" box would
// be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
// where "\x00\x05" is the text string length with value = 5,
// "\0x15\xc7" is the language code = en, and "+0+0/" is a
// location (string) value with longitude = 0 and latitude = 0.
// Since some devices encountered in the wild omit the trailing
// slash, we'll allow that.
if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
return ERROR_MALFORMED;
}
uint16_t len;
if (!mDataSource->getUInt16(data_offset, &len)) {
return ERROR_IO;
}
// allow "+0+0" without trailing slash
if (len < 4 || len > chunk_data_size - 4) {
return ERROR_MALFORMED;
}
// The location string following the language code is formatted
// according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
// Allocate 2 extra bytes, in case we need to add a trailing slash,
// and to add a terminating 0.
std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
if (!buffer) {
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset + 4, &buffer[0], len) < len) {
return ERROR_IO;
}
len = strlen(&buffer[0]);
if (len < 4) {
return ERROR_MALFORMED;
}
// Add a trailing slash if there wasn't one.
if (buffer[len - 1] != '/') {
buffer[len] = '/';
}
mFileMetaData.setCString(kKeyLocation, &buffer[0]);
break;
}
case FOURCC('e', 's', 'd', 's'):
{
*offset += chunk_size;
if (chunk_data_size < 4) {
return ERROR_MALFORMED;
}
uint8_t buffer[256];
if (chunk_data_size > (off64_t)sizeof(buffer)) {
return ERROR_BUFFER_TOO_SMALL;
}
if (mDataSource->readAt(
data_offset, buffer, chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
if (U32_AT(buffer) != 0) {
// Should be version 0, flags 0.
return ERROR_MALFORMED;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
mLastTrack->meta.setData(
kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
if (mPath.size() >= 2
&& mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
// Information from the ESDS must be relied on for proper
// setup of sample rate and channel count for MPEG4 Audio.
// The generic header appears to only contain generic
// information...
status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
&buffer[4], chunk_data_size - 4);
if (err != OK) {
return err;
}
}
if (mPath.size() >= 2
&& mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
// Check if the video is MPEG2
ESDS esds(&buffer[4], chunk_data_size - 4);
uint8_t objectTypeIndication;
if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
}
}
}
break;
}
case FOURCC('b', 't', 'r', 't'):
{
*offset += chunk_size;
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
uint8_t buffer[12];
if (chunk_data_size != sizeof(buffer)) {
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
uint32_t maxBitrate = U32_AT(&buffer[4]);
uint32_t avgBitrate = U32_AT(&buffer[8]);
if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
}
if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
}
break;
}
case FOURCC('a', 'v', 'c', 'C'):
{
*offset += chunk_size;
auto buffer = heapbuffer<uint8_t>(chunk_data_size);
if (buffer.get() == NULL) {
ALOGE("b/28471206");
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
mLastTrack->meta.setData(
kKeyAVCC, kTypeAVCC, buffer.get(), chunk_data_size);
break;
}
case FOURCC('h', 'v', 'c', 'C'):
{
auto buffer = heapbuffer<uint8_t>(chunk_data_size);
if (buffer.get() == NULL) {
ALOGE("b/28471206");
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
mLastTrack->meta.setData(
kKeyHVCC, kTypeHVCC, buffer.get(), chunk_data_size);
*offset += chunk_size;
break;
}
case FOURCC('d', '2', '6', '3'):
{
*offset += chunk_size;
/*
* d263 contains a fixed 7 bytes part:
* vendor - 4 bytes
* version - 1 byte
* level - 1 byte
* profile - 1 byte
* optionally, "d263" box itself may contain a 16-byte
* bit rate box (bitr)
* average bit rate - 4 bytes
* max bit rate - 4 bytes
*/
char buffer[23];
if (chunk_data_size != 7 &&
chunk_data_size != 23) {
ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
mLastTrack->meta.setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
break;
}
case FOURCC('m', 'e', 't', 'a'):
{
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset;
bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
if (!isParsingMetaKeys) {
uint8_t buffer[4];
if (chunk_data_size < (off64_t)sizeof(buffer)) {
*offset = stop_offset;
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, 4) < 4) {
*offset = stop_offset;
return ERROR_IO;
}
if (U32_AT(buffer) != 0) {
// Should be version 0, flags 0.
// If it's not, let's assume this is one of those
// apparently malformed chunks that don't have flags
// and completely different semantics than what's
// in the MPEG4 specs and skip it.
*offset = stop_offset;
return OK;
}
*offset += sizeof(buffer);
}
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC('i', 'l', 'o', 'c'):
case FOURCC('i', 'i', 'n', 'f'):
case FOURCC('i', 'p', 'r', 'p'):
case FOURCC('p', 'i', 't', 'm'):
case FOURCC('i', 'd', 'a', 't'):
case FOURCC('i', 'r', 'e', 'f'):
case FOURCC('i', 'p', 'r', 'o'):
{
if (mIsHeif) {
if (mItemTable == NULL) {
mItemTable = new ItemTable(mDataSource);
}
status_t err = mItemTable->parse(
chunk_type, data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
*offset += chunk_size;
break;
}
case FOURCC('m', 'e', 'a', 'n'):
case FOURCC('n', 'a', 'm', 'e'):
case FOURCC('d', 'a', 't', 'a'):
{
*offset += chunk_size;
if (mPath.size() == 6 && underMetaDataPath(mPath)) {
status_t err = parseITunesMetaData(data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
break;
}
case FOURCC('m', 'v', 'h', 'd'):
{
*offset += chunk_size;
if (depth != 1) {
ALOGE("mvhd: depth %d", depth);
return ERROR_MALFORMED;
}
if (chunk_data_size < 32) {
return ERROR_MALFORMED;
}
uint8_t header[32];
if (mDataSource->readAt(
data_offset, header, sizeof(header))
< (ssize_t)sizeof(header)) {
return ERROR_IO;
}
uint64_t creationTime;
uint64_t duration = 0;
if (header[0] == 1) {
creationTime = U64_AT(&header[4]);
mHeaderTimescale = U32_AT(&header[20]);
duration = U64_AT(&header[24]);
if (duration == 0xffffffffffffffff) {
duration = 0;
}
} else if (header[0] != 0) {
return ERROR_MALFORMED;
} else {
creationTime = U32_AT(&header[4]);
mHeaderTimescale = U32_AT(&header[12]);
uint32_t d32 = U32_AT(&header[16]);
if (d32 == 0xffffffff) {
d32 = 0;
}
duration = d32;
}
if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
}
String8 s;
if (convertTimeToDate(creationTime, &s)) {
mFileMetaData.setCString(kKeyDate, s.string());
}
break;
}
case FOURCC('m', 'e', 'h', 'd'):
{
*offset += chunk_size;
if (chunk_data_size < 8) {
return ERROR_MALFORMED;
}
uint8_t flags[4];
if (mDataSource->readAt(
data_offset, flags, sizeof(flags))
< (ssize_t)sizeof(flags)) {
return ERROR_IO;
}
uint64_t duration = 0;
if (flags[0] == 1) {
// 64 bit
if (chunk_data_size < 12) {
return ERROR_MALFORMED;
}
mDataSource->getUInt64(data_offset + 4, &duration);
if (duration == 0xffffffffffffffff) {
duration = 0;
}
} else if (flags[0] == 0) {
// 32 bit
uint32_t d32;
mDataSource->getUInt32(data_offset + 4, &d32);
if (d32 == 0xffffffff) {
d32 = 0;
}
duration = d32;
} else {
return ERROR_MALFORMED;
}
if (duration != 0 && mHeaderTimescale != 0) {
mFileMetaData.setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
}
break;
}
case FOURCC('m', 'd', 'a', 't'):
{
mMdatFound = true;
*offset += chunk_size;
break;
}
case FOURCC('h', 'd', 'l', 'r'):
{
*offset += chunk_size;
if (underQTMetaPath(mPath, 3)) {
break;
}
uint32_t buffer;
if (mDataSource->readAt(
data_offset + 8, &buffer, 4) < 4) {
return ERROR_IO;
}
uint32_t type = ntohl(buffer);
// For the 3GPP file format, the handler-type within the 'hdlr' box
// shall be 'text'. We also want to support 'sbtl' handler type
// for a practical reason as various MPEG4 containers use it.
if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
if (mLastTrack != NULL) {
mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
}
}
break;
}
case FOURCC('k', 'e', 'y', 's'):
{
*offset += chunk_size;
if (underQTMetaPath(mPath, 3)) {
status_t err = parseQTMetaKey(data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
break;
}
case FOURCC('t', 'r', 'e', 'x'):
{
*offset += chunk_size;
if (chunk_data_size < 24) {
return ERROR_IO;
}
Trex trex;
if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
!mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
!mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
!mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
!mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
return ERROR_IO;
}
mTrex.add(trex);
break;
}
case FOURCC('t', 'x', '3', 'g'):
{
if (mLastTrack == NULL)
return ERROR_MALFORMED;
uint32_t type;
const void *data;
size_t size = 0;
if (!mLastTrack->meta.findData(
kKeyTextFormatData, &type, &data, &size)) {
size = 0;
}
if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
return ERROR_MALFORMED;
}
uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
if (buffer == NULL) {
return ERROR_MALFORMED;
}
if (size > 0) {
memcpy(buffer, data, size);
}
if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
< chunk_size) {
delete[] buffer;
buffer = NULL;
// advance read pointer so we don't end up reading this again
*offset += chunk_size;
return ERROR_IO;
}
mLastTrack->meta.setData(
kKeyTextFormatData, 0, buffer, size + chunk_size);
delete[] buffer;
*offset += chunk_size;
break;
}
case FOURCC('c', 'o', 'v', 'r'):
{
*offset += chunk_size;
ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
chunk_data_size, data_offset);
if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
return ERROR_MALFORMED;
}
auto buffer = heapbuffer<uint8_t>(chunk_data_size);
if (buffer.get() == NULL) {
ALOGE("b/28471206");
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
return ERROR_IO;
}
const int kSkipBytesOfDataBox = 16;
if (chunk_data_size <= kSkipBytesOfDataBox) {
return ERROR_MALFORMED;
}
mFileMetaData.setData(
kKeyAlbumArt, MetaData::TYPE_NONE,
buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
break;
}
case FOURCC('c', 'o', 'l', 'r'):
{
*offset += chunk_size;
// this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
// ignore otherwise
if (depth >= 2 && mPath[depth - 2] == FOURCC('s', 't', 's', 'd')) {
status_t err = parseColorInfo(data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
break;
}
case FOURCC('t', 'i', 't', 'l'):
case FOURCC('p', 'e', 'r', 'f'):
case FOURCC('a', 'u', 't', 'h'):
case FOURCC('g', 'n', 'r', 'e'):
case FOURCC('a', 'l', 'b', 'm'):
case FOURCC('y', 'r', 'r', 'c'):
{
*offset += chunk_size;
status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
if (err != OK) {
return err;
}
break;
}
case FOURCC('I', 'D', '3', '2'):
{
*offset += chunk_size;
if (chunk_data_size < 6) {
return ERROR_MALFORMED;
}
parseID3v2MetaData(data_offset + 6);
break;
}
case FOURCC('-', '-', '-', '-'):
{
mLastCommentMean.clear();
mLastCommentName.clear();
mLastCommentData.clear();
*offset += chunk_size;
break;
}
case FOURCC('s', 'i', 'd', 'x'):
{
status_t err = parseSegmentIndex(data_offset, chunk_data_size);
if (err != OK) {
return err;
}
*offset += chunk_size;
return UNKNOWN_ERROR; // stop parsing after sidx
}
case FOURCC('a', 'c', '-', '3'):
{
*offset += chunk_size;
return parseAC3SampleEntry(data_offset);
}
case FOURCC('f', 't', 'y', 'p'):
{
if (chunk_data_size < 8 || depth != 0) {
return ERROR_MALFORMED;
}
off64_t stop_offset = *offset + chunk_size;
uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
std::set<uint32_t> brandSet;
for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
if (i == 1) {
// Skip this index, it refers to the minorVersion,
// not a brand.
continue;
}
uint32_t brand;
if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
return ERROR_MALFORMED;
}
brand = ntohl(brand);
brandSet.insert(brand);
}
if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) {
mIsQT = true;
} else {
if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
&& brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
ALOGV("identified HEIF image");
mIsHeif = true;
brandSet.erase(FOURCC('m', 'i', 'f', '1'));
brandSet.erase(FOURCC('h', 'e', 'i', 'c'));
}
if (!brandSet.empty()) {
// This means that the file should have moov box.
// It could be any iso files (mp4, heifs, etc.)
mHasMoovBox = true;
if (mIsHeif) {
ALOGV("identified HEIF image with other tracks");
}
}
}
*offset = stop_offset;
break;
}
default:
{
// check if we're parsing 'ilst' for meta keys
// if so, treat type as a number (key-id).
if (underQTMetaPath(mPath, 3)) {
status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
*offset += chunk_size;
break;
}
}
return OK;
}
status_t MPEG4Extractor::parseAC3SampleEntry(off64_t offset) {
// skip 16 bytes:
// + 6-byte reserved,
// + 2-byte data reference index,
// + 8-byte reserved
offset += 16;
uint16_t channelCount;
if (!mDataSource->getUInt16(offset, &channelCount)) {
return ERROR_MALFORMED;
}
// skip 8 bytes:
// + 2-byte channelCount,
// + 2-byte sample size,
// + 4-byte reserved
offset += 8;
uint16_t sampleRate;
if (!mDataSource->getUInt16(offset, &sampleRate)) {
ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read sample rate");
return ERROR_MALFORMED;
}
// skip 4 bytes:
// + 2-byte sampleRate,
// + 2-byte reserved
offset += 4;
return parseAC3SpecificBox(offset, sampleRate);
}
status_t MPEG4Extractor::parseAC3SpecificBox(
off64_t offset, uint16_t sampleRate) {
uint32_t size;
// + 4-byte size
// + 4-byte type
// + 3-byte payload
const uint32_t kAC3SpecificBoxSize = 11;
if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
return ERROR_MALFORMED;
}
offset += 4;
uint32_t type;
if (!mDataSource->getUInt32(offset, &type) || type != FOURCC('d', 'a', 'c', '3')) {
ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
return ERROR_MALFORMED;
}
offset += 4;
const uint32_t kAC3SpecificBoxPayloadSize = 3;
uint8_t chunk[kAC3SpecificBoxPayloadSize];
if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
return ERROR_MALFORMED;
}
ABitReader br(chunk, sizeof(chunk));
static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
static const unsigned sampleRateTable[] = {48000, 44100, 32000};
unsigned fscod = br.getBits(2);
if (fscod == 3) {
ALOGE("Incorrect fscod (3) in AC3 header");
return ERROR_MALFORMED;
}
unsigned boxSampleRate = sampleRateTable[fscod];
if (boxSampleRate != sampleRate) {
ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
boxSampleRate, sampleRate);
return ERROR_MALFORMED;
}
unsigned bsid = br.getBits(5);
if (bsid > 8) {
ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
return ERROR_MALFORMED;
}
// skip
unsigned bsmod __unused = br.getBits(3);
unsigned acmod = br.getBits(3);
unsigned lfeon = br.getBits(1);
unsigned channelCount = channelCountTable[acmod] + lfeon;
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
mLastTrack->meta.setInt32(kKeyChannelCount, channelCount);
mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
return OK;
}
status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
ALOGV("MPEG4Extractor::parseSegmentIndex");
if (size < 12) {
return -EINVAL;
}
uint32_t flags;
if (!mDataSource->getUInt32(offset, &flags)) {
return ERROR_MALFORMED;
}
uint32_t version = flags >> 24;
flags &= 0xffffff;
ALOGV("sidx version %d", version);
uint32_t referenceId;
if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
return ERROR_MALFORMED;
}
uint32_t timeScale;
if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
return ERROR_MALFORMED;
}
ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
if (timeScale == 0)
return ERROR_MALFORMED;
uint64_t earliestPresentationTime;
uint64_t firstOffset;
offset += 12;
size -= 12;
if (version == 0) {
if (size < 8) {
return -EINVAL;
}
uint32_t tmp;
if (!mDataSource->getUInt32(offset, &tmp)) {
return ERROR_MALFORMED;
}
earliestPresentationTime = tmp;
if (!mDataSource->getUInt32(offset + 4, &tmp)) {
return ERROR_MALFORMED;
}
firstOffset = tmp;
offset += 8;
size -= 8;
} else {
if (size < 16) {
return -EINVAL;
}
if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
return ERROR_MALFORMED;
}
if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
return ERROR_MALFORMED;
}
offset += 16;
size -= 16;
}
ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
if (size < 4) {
return -EINVAL;
}
uint16_t referenceCount;
if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
return ERROR_MALFORMED;
}
offset += 4;
size -= 4;
ALOGV("refcount: %d", referenceCount);
if (size < referenceCount * 12) {
return -EINVAL;
}
uint64_t total_duration = 0;
for (unsigned int i = 0; i < referenceCount; i++) {
uint32_t d1, d2, d3;
if (!mDataSource->getUInt32(offset, &d1) || // size
!mDataSource->getUInt32(offset + 4, &d2) || // duration
!mDataSource->getUInt32(offset + 8, &d3)) { // flags
return ERROR_MALFORMED;
}
if (d1 & 0x80000000) {
ALOGW("sub-sidx boxes not supported yet");
}
bool sap = d3 & 0x80000000;
uint32_t saptype = (d3 >> 28) & 7;
if (!sap || (saptype != 1 && saptype != 2)) {
// type 1 and 2 are sync samples
ALOGW("not a stream access point, or unsupported type: %08x", d3);
}
total_duration += d2;
offset += 12;
ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
SidxEntry se;
se.mSize = d1 & 0x7fffffff;
se.mDurationUs = 1000000LL * d2 / timeScale;
mSidxEntries.add(se);
}
uint64_t sidxDuration = total_duration * 1000000 / timeScale;
if (mLastTrack == NULL)
return ERROR_MALFORMED;
int64_t metaDuration;
if (!mLastTrack->meta.findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
mLastTrack->meta.setInt64(kKeyDuration, sidxDuration);
}
return OK;
}
status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
if (size < 8) {
return ERROR_MALFORMED;
}
uint32_t count;
if (!mDataSource->getUInt32(offset + 4, &count)) {
return ERROR_MALFORMED;
}
if (mMetaKeyMap.size() > 0) {
ALOGW("'keys' atom seen again, discarding existing entries");
mMetaKeyMap.clear();
}
off64_t keyOffset = offset + 8;
off64_t stopOffset = offset + size;
for (size_t i = 1; i <= count; i++) {
if (keyOffset + 8 > stopOffset) {
return ERROR_MALFORMED;
}
uint32_t keySize;
if (!mDataSource->getUInt32(keyOffset, &keySize)
|| keySize < 8
|| keyOffset + keySize > stopOffset) {
return ERROR_MALFORMED;
}
uint32_t type;
if (!mDataSource->getUInt32(keyOffset + 4, &type)
|| type != FOURCC('m', 'd', 't', 'a')) {
return ERROR_MALFORMED;
}
keySize -= 8;
keyOffset += 8;
auto keyData = heapbuffer<uint8_t>(keySize);
if (keyData.get() == NULL) {
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
return ERROR_MALFORMED;
}
AString key((const char *)keyData.get(), keySize);
mMetaKeyMap.add(i, key);
keyOffset += keySize;
}
return OK;
}
status_t MPEG4Extractor::parseQTMetaVal(
int32_t keyId, off64_t offset, size_t size) {
ssize_t index = mMetaKeyMap.indexOfKey(keyId);
if (index < 0) {
// corresponding key is not present, ignore
return ERROR_MALFORMED;
}
if (size <= 16) {
return ERROR_MALFORMED;
}
uint32_t dataSize;
if (!mDataSource->getUInt32(offset, &dataSize)
|| dataSize > size || dataSize <= 16) {
return ERROR_MALFORMED;
}
uint32_t atomFourCC;
if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
|| atomFourCC != FOURCC('d', 'a', 't', 'a')) {
return ERROR_MALFORMED;
}
uint32_t dataType;
if (!mDataSource->getUInt32(offset + 8, &dataType)
|| ((dataType & 0xff000000) != 0)) {
// not well-known type
return ERROR_MALFORMED;
}
dataSize -= 16;
offset += 16;
if (dataType == 23 && dataSize >= 4) {
// BE Float32
uint32_t val;
if (!mDataSource->getUInt32(offset, &val)) {
return ERROR_MALFORMED;
}
if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
mFileMetaData.setFloat(kKeyCaptureFramerate, *(float *)&val);
}
} else if (dataType == 67 && dataSize >= 4) {
// BE signed int32
uint32_t val;
if (!mDataSource->getUInt32(offset, &val)) {
return ERROR_MALFORMED;
}
if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
mFileMetaData.setInt32(kKeyTemporalLayerCount, val);
}
} else {
// add more keys if needed
ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
}
return OK;
}
status_t MPEG4Extractor::parseTrackHeader(
off64_t data_offset, off64_t data_size) {
if (data_size < 4) {
return ERROR_MALFORMED;
}
uint8_t version;
if (mDataSource->readAt(data_offset, &version, 1) < 1) {
return ERROR_IO;
}
size_t dynSize = (version == 1) ? 36 : 24;
uint8_t buffer[36 + 60];
if (data_size != (off64_t)dynSize + 60) {
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, data_size) < (ssize_t)data_size) {
return ERROR_IO;
}
uint64_t ctime __unused, mtime __unused, duration __unused;
int32_t id;
if (version == 1) {
ctime = U64_AT(&buffer[4]);
mtime = U64_AT(&buffer[12]);
id = U32_AT(&buffer[20]);
duration = U64_AT(&buffer[28]);
} else if (version == 0) {
ctime = U32_AT(&buffer[4]);
mtime = U32_AT(&buffer[8]);
id = U32_AT(&buffer[12]);
duration = U32_AT(&buffer[20]);
} else {
return ERROR_UNSUPPORTED;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
mLastTrack->meta.setInt32(kKeyTrackID, id);
size_t matrixOffset = dynSize + 16;
int32_t a00 = U32_AT(&buffer[matrixOffset]);
int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
#if 0
int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
ALOGI("x' = %.2f * x + %.2f * y + %.2f",
a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
ALOGI("y' = %.2f * x + %.2f * y + %.2f",
a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
#endif
uint32_t rotationDegrees;
static const int32_t kFixedOne = 0x10000;
if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
// Identity, no rotation
rotationDegrees = 0;
} else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
rotationDegrees = 90;
} else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
rotationDegrees = 270;
} else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
rotationDegrees = 180;
} else {
ALOGW("We only support 0,90,180,270 degree rotation matrices");
rotationDegrees = 0;
}
if (rotationDegrees != 0) {
mLastTrack->meta.setInt32(kKeyRotation, rotationDegrees);
}
// Handle presentation display size, which could be different
// from the image size indicated by kKeyWidth and kKeyHeight.
uint32_t width = U32_AT(&buffer[dynSize + 52]);
uint32_t height = U32_AT(&buffer[dynSize + 56]);
mLastTrack->meta.setInt32(kKeyDisplayWidth, width >> 16);
mLastTrack->meta.setInt32(kKeyDisplayHeight, height >> 16);
return OK;
}
status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
if (size == 0) {
return OK;
}
if (size < 4 || size == SIZE_MAX) {
return ERROR_MALFORMED;
}
uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
if (buffer == NULL) {
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
offset, buffer, size) != (ssize_t)size) {
delete[] buffer;
buffer = NULL;
return ERROR_IO;
}
uint32_t flags = U32_AT(buffer);
uint32_t metadataKey = 0;
char chunk[5];
MakeFourCCString(mPath[4], chunk);
ALOGV("meta: %s @ %lld", chunk, (long long)offset);
switch ((int32_t)mPath[4]) {
case FOURCC(0xa9, 'a', 'l', 'b'):
{
metadataKey = kKeyAlbum;
break;
}
case FOURCC(0xa9, 'A', 'R', 'T'):
{
metadataKey = kKeyArtist;
break;
}
case FOURCC('a', 'A', 'R', 'T'):
{
metadataKey = kKeyAlbumArtist;
break;
}
case FOURCC(0xa9, 'd', 'a', 'y'):
{
metadataKey = kKeyYear;
break;
}
case FOURCC(0xa9, 'n', 'a', 'm'):
{
metadataKey = kKeyTitle;
break;
}
case FOURCC(0xa9, 'w', 'r', 't'):
{
metadataKey = kKeyWriter;
break;
}
case FOURCC('c', 'o', 'v', 'r'):
{
metadataKey = kKeyAlbumArt;
break;
}
case FOURCC('g', 'n', 'r', 'e'):
{
metadataKey = kKeyGenre;
break;
}
case FOURCC(0xa9, 'g', 'e', 'n'):
{
metadataKey = kKeyGenre;
break;
}
case FOURCC('c', 'p', 'i', 'l'):
{
if (size == 9 && flags == 21) {
char tmp[16];
sprintf(tmp, "%d",
(int)buffer[size - 1]);
mFileMetaData.setCString(kKeyCompilation, tmp);
}
break;
}
case FOURCC('t', 'r', 'k', 'n'):
{
if (size == 16 && flags == 0) {
char tmp[16];
uint16_t* pTrack = (uint16_t*)&buffer[10];
uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
}
break;
}
case FOURCC('d', 'i', 's', 'k'):
{
if ((size == 14 || size == 16) && flags == 0) {
char tmp[16];
uint16_t* pDisc = (uint16_t*)&buffer[10];
uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
mFileMetaData.setCString(kKeyDiscNumber, tmp);
}
break;
}
case FOURCC('-', '-', '-', '-'):
{
buffer[size] = '\0';
switch (mPath[5]) {
case FOURCC('m', 'e', 'a', 'n'):
mLastCommentMean.setTo((const char *)buffer + 4);
break;
case FOURCC('n', 'a', 'm', 'e'):
mLastCommentName.setTo((const char *)buffer + 4);
break;
case FOURCC('d', 'a', 't', 'a'):
if (size < 8) {
delete[] buffer;
buffer = NULL;
ALOGE("b/24346430");
return ERROR_MALFORMED;
}
mLastCommentData.setTo((const char *)buffer + 8);
break;
}
// Once we have a set of mean/name/data info, go ahead and process
// it to see if its something we are interested in. Whether or not
// were are interested in the specific tag, make sure to clear out
// the set so we can be ready to process another tuple should one
// show up later in the file.
if ((mLastCommentMean.length() != 0) &&
(mLastCommentName.length() != 0) &&
(mLastCommentData.length() != 0)) {
if (mLastCommentMean == "com.apple.iTunes"
&& mLastCommentName == "iTunSMPB") {
int32_t delay, padding;
if (sscanf(mLastCommentData,
" %*x %x %x %*x", &delay, &padding) == 2) {
if (mLastTrack == NULL) {
delete[] buffer;
return ERROR_MALFORMED;
}
mLastTrack->meta.setInt32(kKeyEncoderDelay, delay);
mLastTrack->meta.setInt32(kKeyEncoderPadding, padding);
}
}
mLastCommentMean.clear();
mLastCommentName.clear();
mLastCommentData.clear();
}
break;
}
default:
break;
}
if (size >= 8 && metadataKey && !mFileMetaData.hasData(metadataKey)) {
if (metadataKey == kKeyAlbumArt) {
mFileMetaData.setData(
kKeyAlbumArt, MetaData::TYPE_NONE,
buffer + 8, size - 8);
} else if (metadataKey == kKeyGenre) {
if (flags == 0) {
// uint8_t genre code, iTunes genre codes are
// the standard id3 codes, except they start
// at 1 instead of 0 (e.g. Pop is 14, not 13)
// We use standard id3 numbering, so subtract 1.
int genrecode = (int)buffer[size - 1];
genrecode--;
if (genrecode < 0) {
genrecode = 255; // reserved for 'unknown genre'
}
char genre[10];
sprintf(genre, "%d", genrecode);
mFileMetaData.setCString(metadataKey, genre);
} else if (flags == 1) {
// custom genre string
buffer[size] = '\0';
mFileMetaData.setCString(
metadataKey, (const char *)buffer + 8);
}
} else {
buffer[size] = '\0';
mFileMetaData.setCString(
metadataKey, (const char *)buffer + 8);
}
}
delete[] buffer;
buffer = NULL;
return OK;
}
status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
return ERROR_MALFORMED;
}
uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
if (buffer == NULL) {
return ERROR_MALFORMED;
}
if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
delete[] buffer;
buffer = NULL;
return ERROR_IO;
}
int32_t type = U32_AT(&buffer[0]);
if ((type == FOURCC('n', 'c', 'l', 'x') && size >= 11)
|| (type == FOURCC('n', 'c', 'l', 'c') && size >= 10)) {
int32_t primaries = U16_AT(&buffer[4]);
int32_t transfer = U16_AT(&buffer[6]);
int32_t coeffs = U16_AT(&buffer[8]);
bool fullRange = (type == FOURCC('n', 'c', 'l', 'x')) && (buffer[10] & 128);
ColorAspects aspects;
ColorUtils::convertIsoColorAspectsToCodecAspects(
primaries, transfer, coeffs, fullRange, aspects);
// only store the first color specification
if (!mLastTrack->meta.hasData(kKeyColorPrimaries)) {
mLastTrack->meta.setInt32(kKeyColorPrimaries, aspects.mPrimaries);
mLastTrack->meta.setInt32(kKeyTransferFunction, aspects.mTransfer);
mLastTrack->meta.setInt32(kKeyColorMatrix, aspects.mMatrixCoeffs);
mLastTrack->meta.setInt32(kKeyColorRange, aspects.mRange);
}
}
delete[] buffer;
buffer = NULL;
return OK;
}
status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
if (size < 4 || size == SIZE_MAX) {
return ERROR_MALFORMED;
}
uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
if (buffer == NULL) {
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
offset, buffer, size) != (ssize_t)size) {
delete[] buffer;
buffer = NULL;
return ERROR_IO;
}
uint32_t metadataKey = 0;
switch (mPath[depth]) {
case FOURCC('t', 'i', 't', 'l'):
{
metadataKey = kKeyTitle;
break;
}
case FOURCC('p', 'e', 'r', 'f'):
{
metadataKey = kKeyArtist;
break;
}
case FOURCC('a', 'u', 't', 'h'):
{
metadataKey = kKeyWriter;
break;
}
case FOURCC('g', 'n', 'r', 'e'):
{
metadataKey = kKeyGenre;
break;
}
case FOURCC('a', 'l', 'b', 'm'):
{
if (buffer[size - 1] != '\0') {
char tmp[4];
sprintf(tmp, "%u", buffer[size - 1]);
mFileMetaData.setCString(kKeyCDTrackNumber, tmp);
}
metadataKey = kKeyAlbum;
break;
}
case FOURCC('y', 'r', 'r', 'c'):
{
if (size < 6) {
delete[] buffer;
buffer = NULL;
ALOGE("b/62133227");
android_errorWriteLog(0x534e4554, "62133227");
return ERROR_MALFORMED;
}
char tmp[5];
uint16_t year = U16_AT(&buffer[4]);
if (year < 10000) {
sprintf(tmp, "%u", year);
mFileMetaData.setCString(kKeyYear, tmp);
}
break;
}
default:
break;
}
if (metadataKey > 0) {
bool isUTF8 = true; // Common case
char16_t *framedata = NULL;
int len16 = 0; // Number of UTF-16 characters
// smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
if (size < 6) {
delete[] buffer;
buffer = NULL;
return ERROR_MALFORMED;
}
if (size - 6 >= 4) {
len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
framedata = (char16_t *)(buffer + 6);
if (0xfffe == *framedata) {
// endianness marker (BOM) doesn't match host endianness
for (int i = 0; i < len16; i++) {
framedata[i] = bswap_16(framedata[i]);
}
// BOM is now swapped to 0xfeff, we will execute next block too
}
if (0xfeff == *framedata) {
// Remove the BOM
framedata++;
len16--;
isUTF8 = false;
}
// else normal non-zero-length UTF-8 string
// we can't handle UTF-16 without BOM as there is no other
// indication of encoding.
}
if (isUTF8) {
buffer[size] = 0;
mFileMetaData.setCString(metadataKey, (const char *)buffer + 6);
} else {
// Convert from UTF-16 string to UTF-8 string.
String8 tmpUTF8str(framedata, len16);
mFileMetaData.setCString(metadataKey, tmpUTF8str.string());
}
}
delete[] buffer;
buffer = NULL;
return OK;
}
void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
ID3 id3(mDataSource, true /* ignorev1 */, offset);
if (id3.isValid()) {
struct Map {
int key;
const char *tag1;
const char *tag2;
};
static const Map kMap[] = {
{ kKeyAlbum, "TALB", "TAL" },
{ kKeyArtist, "TPE1", "TP1" },
{ kKeyAlbumArtist, "TPE2", "TP2" },
{ kKeyComposer, "TCOM", "TCM" },
{ kKeyGenre, "TCON", "TCO" },
{ kKeyTitle, "TIT2", "TT2" },
{ kKeyYear, "TYE", "TYER" },
{ kKeyAuthor, "TXT", "TEXT" },
{ kKeyCDTrackNumber, "TRK", "TRCK" },
{ kKeyDiscNumber, "TPA", "TPOS" },
{ kKeyCompilation, "TCP", "TCMP" },
};
static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
for (size_t i = 0; i < kNumMapEntries; ++i) {
if (!mFileMetaData.hasData(kMap[i].key)) {
ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
if (it->done()) {
delete it;
it = new ID3::Iterator(id3, kMap[i].tag2);
}
if (it->done()) {
delete it;
continue;
}
String8 s;
it->getString(&s);
delete it;
mFileMetaData.setCString(kMap[i].key, s);
}
}
size_t dataSize;
String8 mime;
const void *data = id3.getAlbumArt(&dataSize, &mime);
if (data) {
mFileMetaData.setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
mFileMetaData.setCString(kKeyAlbumArtMIME, mime.string());
}
}
}
MediaTrack *MPEG4Extractor::getTrack(size_t index) {
status_t err;
if ((err = readMetaData()) != OK) {
return NULL;
}
Track *track = mFirstTrack;
while (index > 0) {
if (track == NULL) {
return NULL;
}
track = track->next;
--index;
}
if (track == NULL) {
return NULL;
}
Trex *trex = NULL;
int32_t trackId;
if (track->meta.findInt32(kKeyTrackID, &trackId)) {
for (size_t i = 0; i < mTrex.size(); i++) {
Trex *t = &mTrex.editItemAt(i);
if (t->track_ID == (uint32_t) trackId) {
trex = t;
break;
}
}
} else {
ALOGE("b/21657957");
return NULL;
}
ALOGV("getTrack called, pssh: %zu", mPssh.size());
const char *mime;
if (!track->meta.findCString(kKeyMIMEType, &mime)) {
return NULL;
}
sp<ItemTable> itemTable;
if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
uint32_t type;
const void *data;
size_t size;
if (!track->meta.findData(kKeyAVCC, &type, &data, &size)) {
return NULL;
}
const uint8_t *ptr = (const uint8_t *)data;
if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
return NULL;
}
} else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
|| !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
uint32_t type;
const void *data;
size_t size;
if (!track->meta.findData(kKeyHVCC, &type, &data, &size)) {
return NULL;
}
const uint8_t *ptr = (const uint8_t *)data;
if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
return NULL;
}
if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
itemTable = mItemTable;
}
}
MPEG4Source *source = new MPEG4Source(
track->meta, mDataSource, track->timescale, track->sampleTable,
mSidxEntries, trex, mMoofOffset, itemTable);
if (source->init() != OK) {
delete source;
return NULL;
}
return source;
}
// static
status_t MPEG4Extractor::verifyTrack(Track *track) {
const char *mime;
CHECK(track->meta.findCString(kKeyMIMEType, &mime));
uint32_t type;
const void *data;
size_t size;
if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
if (!track->meta.findData(kKeyAVCC, &type, &data, &size)
|| type != kTypeAVCC) {
return ERROR_MALFORMED;
}
} else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
if (!track->meta.findData(kKeyHVCC, &type, &data, &size)
|| type != kTypeHVCC) {
return ERROR_MALFORMED;
}
} else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
|| !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
|| !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
if (!track->meta.findData(kKeyESDS, &type, &data, &size)
|| type != kTypeESDS) {
return ERROR_MALFORMED;
}
}
if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
// Make sure we have all the metadata we need.
ALOGE("stbl atom missing/invalid.");
return ERROR_MALFORMED;
}
if (track->timescale == 0) {
ALOGE("timescale invalid.");
return ERROR_MALFORMED;
}
return OK;
}
typedef enum {
//AOT_NONE = -1,
//AOT_NULL_OBJECT = 0,
//AOT_AAC_MAIN = 1, /**< Main profile */
AOT_AAC_LC = 2, /**< Low Complexity object */
//AOT_AAC_SSR = 3,
//AOT_AAC_LTP = 4,
AOT_SBR = 5,
//AOT_AAC_SCAL = 6,
//AOT_TWIN_VQ = 7,
//AOT_CELP = 8,
//AOT_HVXC = 9,
//AOT_RSVD_10 = 10, /**< (reserved) */
//AOT_RSVD_11 = 11, /**< (reserved) */
//AOT_TTSI = 12, /**< TTSI Object */
//AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
//AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
//AOT_GEN_MIDI = 15, /**< General MIDI object */
//AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
//AOT_RSVD_18 = 18, /**< (reserved) */
//AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
//AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
//AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
//AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
//AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
//AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
//AOT_RSVD_28 = 28, /**< might become SSC */
AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
//AOT_MPEGS = 30, /**< MPEG Surround */
AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
//AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
//AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
//AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
//AOT_RSVD_35 = 35, /**< might become DST */
//AOT_RSVD_36 = 36, /**< might become ALS */
//AOT_AAC_SLS = 37, /**< AAC + SLS */
//AOT_SLS = 38, /**< SLS */
//AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
//AOT_USAC = 42, /**< USAC */
//AOT_SAOC = 43, /**< SAOC */
//AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
//AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
} AUDIO_OBJECT_TYPE;
status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
const void *esds_data, size_t esds_size) {
ESDS esds(esds_data, esds_size);
uint8_t objectTypeIndication;
if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
return ERROR_MALFORMED;
}
if (objectTypeIndication == 0xe1) {
// This isn't MPEG4 audio at all, it's QCELP 14k...
if (mLastTrack == NULL)
return ERROR_MALFORMED;
mLastTrack->meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
return OK;
}
if (objectTypeIndication == 0x6b) {
// The media subtype is MP3 audio
// Our software MP3 audio decoder may not be able to handle
// packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
ALOGE("MP3 track in MP4/3GPP file is not supported");
return ERROR_UNSUPPORTED;
}
if (mLastTrack != NULL) {
uint32_t maxBitrate = 0;
uint32_t avgBitrate = 0;
esds.getBitRate(&maxBitrate, &avgBitrate);
if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
mLastTrack->meta.setInt32(kKeyMaxBitRate, (int32_t)maxBitrate);
}
if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
mLastTrack->meta.setInt32(kKeyBitRate, (int32_t)avgBitrate);
}
}
const uint8_t *csd;
size_t csd_size;
if (esds.getCodecSpecificInfo(
(const void **)&csd, &csd_size) != OK) {
return ERROR_MALFORMED;
}
if (kUseHexDump) {
printf("ESD of size %zu\n", csd_size);
hexdump(csd, csd_size);
}
if (csd_size == 0) {
// There's no further information, i.e. no codec specific data
// Let's assume that the information provided in the mpeg4 headers
// is accurate and hope for the best.
return OK;
}
if (csd_size < 2) {
return ERROR_MALFORMED;
}
static uint32_t kSamplingRate[] = {
96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
16000, 12000, 11025, 8000, 7350
};
ABitReader br(csd, csd_size);
uint32_t objectType = br.getBits(5);
if (objectType == 31) { // AAC-ELD => additional 6 bits
objectType = 32 + br.getBits(6);
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
//keep AOT type
mLastTrack->meta.setInt32(kKeyAACAOT, objectType);
uint32_t freqIndex = br.getBits(4);
int32_t sampleRate = 0;
int32_t numChannels = 0;
if (freqIndex == 15) {
if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
sampleRate = br.getBits(24);
numChannels = br.getBits(4);
} else {
if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
numChannels = br.getBits(4);
if (freqIndex == 13 || freqIndex == 14) {
return ERROR_MALFORMED;
}
sampleRate = kSamplingRate[freqIndex];
}
if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
uint32_t extFreqIndex = br.getBits(4);
int32_t extSampleRate __unused;
if (extFreqIndex == 15) {
if (csd_size < 8) {
return ERROR_MALFORMED;
}
if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
extSampleRate = br.getBits(24);
} else {
if (extFreqIndex == 13 || extFreqIndex == 14) {
return ERROR_MALFORMED;
}
extSampleRate = kSamplingRate[extFreqIndex];
}
//TODO: save the extension sampling rate value in meta data =>
// mLastTrack->meta.setInt32(kKeyExtSampleRate, extSampleRate);
}
switch (numChannels) {
// values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
case 0:
case 1:// FC
case 2:// FL FR
case 3:// FC, FL FR
case 4:// FC, FL FR, RC
case 5:// FC, FL FR, SL SR
case 6:// FC, FL FR, SL SR, LFE
//numChannels already contains the right value
break;
case 11:// FC, FL FR, SL SR, RC, LFE
numChannels = 7;
break;
case 7: // FC, FCL FCR, FL FR, SL SR, LFE
case 12:// FC, FL FR, SL SR, RL RR, LFE
case 14:// FC, FL FR, SL SR, LFE, FHL FHR
numChannels = 8;
break;
default:
return ERROR_UNSUPPORTED;
}
{
if (objectType == AOT_SBR || objectType == AOT_PS) {
if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
objectType = br.getBits(5);
if (objectType == AOT_ESCAPE) {
if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
objectType = 32 + br.getBits(6);
}
}
if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
objectType == AOT_ER_BSAC) {
if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
const int32_t frameLengthFlag __unused = br.getBits(1);
const int32_t dependsOnCoreCoder = br.getBits(1);
if (dependsOnCoreCoder ) {
if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
const int32_t coreCoderDelay __unused = br.getBits(14);
}
int32_t extensionFlag = -1;
if (br.numBitsLeft() > 0) {
extensionFlag = br.getBits(1);
} else {
switch (objectType) {
// 14496-3 4.5.1.1 extensionFlag
case AOT_AAC_LC:
extensionFlag = 0;
break;
case AOT_ER_AAC_LC:
case AOT_ER_AAC_SCAL:
case AOT_ER_BSAC:
case AOT_ER_AAC_LD:
extensionFlag = 1;
break;
default:
return ERROR_MALFORMED;
break;
}
ALOGW("csd missing extension flag; assuming %d for object type %u.",
extensionFlag, objectType);
}
if (numChannels == 0) {
int32_t channelsEffectiveNum = 0;
int32_t channelsNum = 0;
if (br.numBitsLeft() < 32) {
return ERROR_MALFORMED;
}
const int32_t ElementInstanceTag __unused = br.getBits(4);
const int32_t Profile __unused = br.getBits(2);
const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
const int32_t NumFrontChannelElements = br.getBits(4);
const int32_t NumSideChannelElements = br.getBits(4);
const int32_t NumBackChannelElements = br.getBits(4);
const int32_t NumLfeChannelElements = br.getBits(2);
const int32_t NumAssocDataElements __unused = br.getBits(3);
const int32_t NumValidCcElements __unused = br.getBits(4);
const int32_t MonoMixdownPresent = br.getBits(1);
if (MonoMixdownPresent != 0) {
if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
}
if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
const int32_t StereoMixdownPresent = br.getBits(1);
if (StereoMixdownPresent != 0) {
if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
}
if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
const int32_t MatrixMixdownIndexPresent = br.getBits(1);
if (MatrixMixdownIndexPresent != 0) {
if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
const int32_t MatrixMixdownIndex __unused = br.getBits(2);
const int32_t PseudoSurroundEnable __unused = br.getBits(1);
}
int i;
for (i=0; i < NumFrontChannelElements; i++) {
if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
const int32_t FrontElementIsCpe = br.getBits(1);
const int32_t FrontElementTagSelect __unused = br.getBits(4);
channelsNum += FrontElementIsCpe ? 2 : 1;
}
for (i=0; i < NumSideChannelElements; i++) {
if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
const int32_t SideElementIsCpe = br.getBits(1);
const int32_t SideElementTagSelect __unused = br.getBits(4);
channelsNum += SideElementIsCpe ? 2 : 1;
}
for (i=0; i < NumBackChannelElements; i++) {
if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
const int32_t BackElementIsCpe = br.getBits(1);
const int32_t BackElementTagSelect __unused = br.getBits(4);
channelsNum += BackElementIsCpe ? 2 : 1;
}
channelsEffectiveNum = channelsNum;
for (i=0; i < NumLfeChannelElements; i++) {
if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
const int32_t LfeElementTagSelect __unused = br.getBits(4);
channelsNum += 1;
}
ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
numChannels = channelsNum;
}
}
}
if (numChannels == 0) {
return ERROR_UNSUPPORTED;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
int32_t prevSampleRate;
CHECK(mLastTrack->meta.findInt32(kKeySampleRate, &prevSampleRate));
if (prevSampleRate != sampleRate) {
ALOGV("mpeg4 audio sample rate different from previous setting. "
"was: %d, now: %d", prevSampleRate, sampleRate);
}
mLastTrack->meta.setInt32(kKeySampleRate, sampleRate);
int32_t prevChannelCount;
CHECK(mLastTrack->meta.findInt32(kKeyChannelCount, &prevChannelCount));
if (prevChannelCount != numChannels) {
ALOGV("mpeg4 audio channel count different from previous setting. "
"was: %d, now: %d", prevChannelCount, numChannels);
}
mLastTrack->meta.setInt32(kKeyChannelCount, numChannels);
return OK;
}
////////////////////////////////////////////////////////////////////////////////
MPEG4Source::MPEG4Source(
MetaDataBase &format,
DataSourceBase *dataSource,
int32_t timeScale,
const sp<SampleTable> &sampleTable,
Vector<SidxEntry> &sidx,
const Trex *trex,
off64_t firstMoofOffset,
const sp<ItemTable> &itemTable)
: mFormat(format),
mDataSource(dataSource),
mTimescale(timeScale),
mSampleTable(sampleTable),
mCurrentSampleIndex(0),
mCurrentFragmentIndex(0),
mSegments(sidx),
mTrex(trex),
mFirstMoofOffset(firstMoofOffset),
mCurrentMoofOffset(firstMoofOffset),
mNextMoofOffset(-1),
mCurrentTime(0),
mDefaultEncryptedByteBlock(0),
mDefaultSkipByteBlock(0),
mCurrentSampleInfoAllocSize(0),
mCurrentSampleInfoSizes(NULL),
mCurrentSampleInfoOffsetsAllocSize(0),
mCurrentSampleInfoOffsets(NULL),
mIsAVC(false),
mIsHEVC(false),
mNALLengthSize(0),
mStarted(false),
mGroup(NULL),
mBuffer(NULL),
mWantsNALFragments(false),
mSrcBuffer(NULL),
mIsHeif(itemTable != NULL),
mItemTable(itemTable) {
memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
mFormat.findInt32(kKeyCryptoMode, &mCryptoMode);
mDefaultIVSize = 0;
mFormat.findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
uint32_t keytype;
const void *key;
size_t keysize;
if (mFormat.findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
CHECK(keysize <= 16);
memset(mCryptoKey, 0, 16);
memcpy(mCryptoKey, key, keysize);
}
mFormat.findInt32(kKeyEncryptedByteBlock, &mDefaultEncryptedByteBlock);
mFormat.findInt32(kKeySkipByteBlock, &mDefaultSkipByteBlock);
const char *mime;
bool success = mFormat.findCString(kKeyMIMEType, &mime);
CHECK(success);
mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
if (mIsAVC) {
uint32_t type;
const void *data;
size_t size;
CHECK(format.findData(kKeyAVCC, &type, &data, &size));
const uint8_t *ptr = (const uint8_t *)data;
CHECK(size >= 7);
CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
// The number of bytes used to encode the length of a NAL unit.
mNALLengthSize = 1 + (ptr[4] & 3);
} else if (mIsHEVC) {
uint32_t type;
const void *data;
size_t size;
CHECK(format.findData(kKeyHVCC, &type, &data, &size));
const uint8_t *ptr = (const uint8_t *)data;
CHECK(size >= 22);
CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
mNALLengthSize = 1 + (ptr[14 + 7] & 3);
}
CHECK(format.findInt32(kKeyTrackID, &mTrackId));
}
status_t MPEG4Source::init() {
if (mFirstMoofOffset != 0) {
off64_t offset = mFirstMoofOffset;
return parseChunk(&offset);
}
return OK;
}
MPEG4Source::~MPEG4Source() {
if (mStarted) {
stop();
}
free(mCurrentSampleInfoSizes);
free(mCurrentSampleInfoOffsets);
}
status_t MPEG4Source::start(MetaDataBase *params) {
Mutex::Autolock autoLock(mLock);
CHECK(!mStarted);
int32_t val;
if (params && params->findInt32(kKeyWantsNALFragments, &val)
&& val != 0) {
mWantsNALFragments = true;
} else {
mWantsNALFragments = false;
}
int32_t tmp;
CHECK(mFormat.findInt32(kKeyMaxInputSize, &tmp));
size_t max_size = tmp;
// A somewhat arbitrary limit that should be sufficient for 8k video frames
// If you see the message below for a valid input stream: increase the limit
const size_t kMaxBufferSize = 64 * 1024 * 1024;
if (max_size > kMaxBufferSize) {
ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
return ERROR_MALFORMED;
}
if (max_size == 0) {
ALOGE("zero max input size");
return ERROR_MALFORMED;
}
// Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
const size_t kInitialBuffers = 2;
const size_t kMaxBuffers = 8;
const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
mGroup = new MediaBufferGroup(kInitialBuffers, max_size, realMaxBuffers);
mSrcBuffer = new (std::nothrow) uint8_t[max_size];
if (mSrcBuffer == NULL) {
// file probably specified a bad max size
delete mGroup;
mGroup = NULL;
return ERROR_MALFORMED;
}
mStarted = true;
return OK;
}
status_t MPEG4Source::stop() {
Mutex::Autolock autoLock(mLock);
CHECK(mStarted);
if (mBuffer != NULL) {
mBuffer->release();
mBuffer = NULL;
}
delete[] mSrcBuffer;
mSrcBuffer = NULL;
delete mGroup;
mGroup = NULL;
mStarted = false;
mCurrentSampleIndex = 0;
return OK;
}
status_t MPEG4Source::parseChunk(off64_t *offset) {
uint32_t hdr[2];
if (mDataSource->readAt(*offset, hdr, 8) < 8) {
return ERROR_IO;
}
uint64_t chunk_size = ntohl(hdr[0]);
uint32_t chunk_type = ntohl(hdr[1]);
off64_t data_offset = *offset + 8;
if (chunk_size == 1) {
if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
return ERROR_IO;
}
chunk_size = ntoh64(chunk_size);
data_offset += 8;
if (chunk_size < 16) {
// The smallest valid chunk is 16 bytes long in this case.
return ERROR_MALFORMED;
}
} else if (chunk_size < 8) {
// The smallest valid chunk is 8 bytes long.
return ERROR_MALFORMED;
}
char chunk[5];
MakeFourCCString(chunk_type, chunk);
ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
off64_t chunk_data_size = *offset + chunk_size - data_offset;
switch(chunk_type) {
case FOURCC('t', 'r', 'a', 'f'):
case FOURCC('m', 'o', 'o', 'f'): {
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset;
while (*offset < stop_offset) {
status_t err = parseChunk(offset);
if (err != OK) {
return err;
}
}
if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
// *offset points to the box following this moof. Find the next moof from there.
while (true) {
if (mDataSource->readAt(*offset, hdr, 8) < 8) {
// no more box to the end of file.
break;
}
chunk_size = ntohl(hdr[0]);
chunk_type = ntohl(hdr[1]);
if (chunk_size == 1) {
// ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
// which is defined in 4.2 Object Structure.
// When chunk_size==1, 8 bytes follows as "largesize".
if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
return ERROR_IO;
}
chunk_size = ntoh64(chunk_size);
if (chunk_size < 16) {
// The smallest valid chunk is 16 bytes long in this case.
return ERROR_MALFORMED;
}
} else if (chunk_size == 0) {
// next box extends to end of file.
} else if (chunk_size < 8) {
// The smallest valid chunk is 8 bytes long in this case.
return ERROR_MALFORMED;
}
if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
mNextMoofOffset = *offset;
break;
} else if (chunk_size == 0) {
break;
}
*offset += chunk_size;
}
}
break;
}
case FOURCC('t', 'f', 'h', 'd'): {
status_t err;
if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
return err;
}
*offset += chunk_size;
break;
}
case FOURCC('t', 'r', 'u', 'n'): {
status_t err;
if (mLastParsedTrackId == mTrackId) {
if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
return err;
}
}
*offset += chunk_size;
break;
}
case FOURCC('s', 'a', 'i', 'z'): {
status_t err;
if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
return err;
}
*offset += chunk_size;
break;
}
case FOURCC('s', 'a', 'i', 'o'): {
status_t err;
if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
return err;
}
*offset += chunk_size;
break;
}
case FOURCC('s', 'e', 'n', 'c'): {
status_t err;
if ((err = parseSampleEncryption(data_offset)) != OK) {
return err;
}
*offset += chunk_size;
break;
}
case FOURCC('m', 'd', 'a', 't'): {
// parse DRM info if present
ALOGV("MPEG4Source::parseChunk mdat");
// if saiz/saoi was previously observed, do something with the sampleinfos
*offset += chunk_size;
break;
}
default: {
*offset += chunk_size;
break;
}
}
return OK;
}
status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
off64_t offset, off64_t /* size */) {
ALOGV("parseSampleAuxiliaryInformationSizes");
// 14496-12 8.7.12
uint8_t version;
if (mDataSource->readAt(
offset, &version, sizeof(version))
< (ssize_t)sizeof(version)) {
return ERROR_IO;
}
if (version != 0) {
return ERROR_UNSUPPORTED;
}
offset++;
uint32_t flags;
if (!mDataSource->getUInt24(offset, &flags)) {
return ERROR_IO;
}
offset += 3;
if (flags & 1) {
uint32_t tmp;
if (!mDataSource->getUInt32(offset, &tmp)) {
return ERROR_MALFORMED;
}
mCurrentAuxInfoType = tmp;
offset += 4;
if (!mDataSource->getUInt32(offset, &tmp)) {
return ERROR_MALFORMED;
}
mCurrentAuxInfoTypeParameter = tmp;
offset += 4;
}
uint8_t defsize;
if (mDataSource->readAt(offset, &defsize, 1) != 1) {
return ERROR_MALFORMED;
}
mCurrentDefaultSampleInfoSize = defsize;
offset++;
uint32_t smplcnt;
if (!mDataSource->getUInt32(offset, &smplcnt)) {
return ERROR_MALFORMED;
}
mCurrentSampleInfoCount = smplcnt;
offset += 4;
if (mCurrentDefaultSampleInfoSize != 0) {
ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
return OK;
}
if (smplcnt > mCurrentSampleInfoAllocSize) {
uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
if (newPtr == NULL) {
ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
return NO_MEMORY;
}
mCurrentSampleInfoSizes = newPtr;
mCurrentSampleInfoAllocSize = smplcnt;
}
mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
return OK;
}
status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
off64_t offset, off64_t /* size */) {
ALOGV("parseSampleAuxiliaryInformationOffsets");
// 14496-12 8.7.13
uint8_t version;
if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
return ERROR_IO;
}
offset++;
uint32_t flags;
if (!mDataSource->getUInt24(offset, &flags)) {
return ERROR_IO;
}
offset += 3;
uint32_t entrycount;
if (!mDataSource->getUInt32(offset, &entrycount)) {
return ERROR_IO;
}
offset += 4;
if (entrycount == 0) {
return OK;
}
if (entrycount > UINT32_MAX / 8) {
return ERROR_MALFORMED;
}
if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
if (newPtr == NULL) {
ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
return NO_MEMORY;
}
mCurrentSampleInfoOffsets = newPtr;
mCurrentSampleInfoOffsetsAllocSize = entrycount;
}
mCurrentSampleInfoOffsetCount = entrycount;
if (mCurrentSampleInfoOffsets == NULL) {
return OK;
}
for (size_t i = 0; i < entrycount; i++) {
if (version == 0) {
uint32_t tmp;
if (!mDataSource->getUInt32(offset, &tmp)) {
return ERROR_IO;
}
mCurrentSampleInfoOffsets[i] = tmp;
offset += 4;
} else {
uint64_t tmp;
if (!mDataSource->getUInt64(offset, &tmp)) {
return ERROR_IO;
}
mCurrentSampleInfoOffsets[i] = tmp;
offset += 8;
}
}
// parse clear/encrypted data
off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
drmoffset += mCurrentMoofOffset;
return parseClearEncryptedSizes(drmoffset, false, 0);
}
status_t MPEG4Source::parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags) {
int ivlength;
CHECK(mFormat.findInt32(kKeyCryptoDefaultIVSize, &ivlength));
// only 0, 8 and 16 byte initialization vectors are supported
if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
ALOGW("unsupported IV length: %d", ivlength);
return ERROR_MALFORMED;
}
uint32_t sampleCount = mCurrentSampleInfoCount;
if (isSubsampleEncryption) {
if (!mDataSource->getUInt32(offset, &sampleCount)) {
return ERROR_IO;
}
offset += 4;
}
// read CencSampleAuxiliaryDataFormats
for (size_t i = 0; i < sampleCount; i++) {
if (i >= mCurrentSamples.size()) {
ALOGW("too few samples");
break;
}
Sample *smpl = &mCurrentSamples.editItemAt(i);
if (!smpl->clearsizes.isEmpty()) {
continue;
}
memset(smpl->iv, 0, 16);
if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
return ERROR_IO;
}
offset += ivlength;
bool readSubsamples;
if (isSubsampleEncryption) {
readSubsamples = flags & 2;
} else {
int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
if (smplinfosize == 0) {
smplinfosize = mCurrentSampleInfoSizes[i];
}
readSubsamples = smplinfosize > ivlength;
}
if (readSubsamples) {
uint16_t numsubsamples;
if (!mDataSource->getUInt16(offset, &numsubsamples)) {
return ERROR_IO;
}
offset += 2;
for (size_t j = 0; j < numsubsamples; j++) {
uint16_t numclear;
uint32_t numencrypted;
if (!mDataSource->getUInt16(offset, &numclear)) {
return ERROR_IO;
}
offset += 2;
if (!mDataSource->getUInt32(offset, &numencrypted)) {
return ERROR_IO;
}
offset += 4;
smpl->clearsizes.add(numclear);
smpl->encryptedsizes.add(numencrypted);
}
} else {
smpl->clearsizes.add(0);
smpl->encryptedsizes.add(smpl->size);
}
}
return OK;
}
status_t MPEG4Source::parseSampleEncryption(off64_t offset) {
uint32_t flags;
if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
return ERROR_MALFORMED;
}
return parseClearEncryptedSizes(offset + 4, true, flags);
}
status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
if (size < 8) {
return -EINVAL;
}
uint32_t flags;
if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
return ERROR_MALFORMED;
}
if (flags & 0xff000000) {
return -EINVAL;
}
if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
return ERROR_MALFORMED;
}
if (mLastParsedTrackId != mTrackId) {
// this is not the right track, skip it
return OK;
}
mTrackFragmentHeaderInfo.mFlags = flags;
mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
offset += 8;
size -= 8;
ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
if (size < 8) {
return -EINVAL;
}
if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
return ERROR_MALFORMED;
}
offset += 8;
size -= 8;
}
if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
if (size < 4) {
return -EINVAL;
}
if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
return ERROR_MALFORMED;
}
offset += 4;
size -= 4;
}
if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
if (size < 4) {
return -EINVAL;
}
if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
return ERROR_MALFORMED;
}
offset += 4;
size -= 4;
}
if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
if (size < 4) {
return -EINVAL;
}
if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
return ERROR_MALFORMED;
}
offset += 4;
size -= 4;
}
if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
if (size < 4) {
return -EINVAL;
}
if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
return ERROR_MALFORMED;
}
offset += 4;
size -= 4;
}
if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
}
mTrackFragmentHeaderInfo.mDataOffset = 0;
return OK;
}
status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
ALOGV("MPEG4Extractor::parseTrackFragmentRun");
if (size < 8) {
return -EINVAL;
}
enum {
kDataOffsetPresent = 0x01,
kFirstSampleFlagsPresent = 0x04,
kSampleDurationPresent = 0x100,
kSampleSizePresent = 0x200,
kSampleFlagsPresent = 0x400,
kSampleCompositionTimeOffsetPresent = 0x800,
};
uint32_t flags;
if (!mDataSource->getUInt32(offset, &flags)) {
return ERROR_MALFORMED;
}
// |version| only affects SampleCompositionTimeOffset field.
// If version == 0, SampleCompositionTimeOffset is uint32_t;
// Otherwise, SampleCompositionTimeOffset is int32_t.
// Sample.compositionOffset is defined as int32_t.
uint8_t version = flags >> 24;
flags &= 0xffffff;
ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
// These two shall not be used together.
return -EINVAL;
}
uint32_t sampleCount;
if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
return ERROR_MALFORMED;
}
offset += 8;
size -= 8;
uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
uint32_t firstSampleFlags = 0;
if (flags & kDataOffsetPresent) {
if (size < 4) {
return -EINVAL;
}
int32_t dataOffsetDelta;
if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
return ERROR_MALFORMED;
}
dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
offset += 4;
size -= 4;
}
if (flags & kFirstSampleFlagsPresent) {
if (size < 4) {
return -EINVAL;
}
if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
return ERROR_MALFORMED;
}
offset += 4;
size -= 4;
}
uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
sampleCtsOffset = 0;
size_t bytesPerSample = 0;
if (flags & kSampleDurationPresent) {
bytesPerSample += 4;
} else if (mTrackFragmentHeaderInfo.mFlags
& TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
} else if (mTrex) {
sampleDuration = mTrex->default_sample_duration;
}
if (flags & kSampleSizePresent) {
bytesPerSample += 4;
} else if (mTrackFragmentHeaderInfo.mFlags
& TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
} else {
sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
}
if (flags & kSampleFlagsPresent) {
bytesPerSample += 4;
} else if (mTrackFragmentHeaderInfo.mFlags
& TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
} else {
sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
}
if (flags & kSampleCompositionTimeOffsetPresent) {
bytesPerSample += 4;
} else {
sampleCtsOffset = 0;
}
if (size < (off64_t)(sampleCount * bytesPerSample)) {
return -EINVAL;
}
Sample tmp;
for (uint32_t i = 0; i < sampleCount; ++i) {
if (flags & kSampleDurationPresent) {
if (!mDataSource->getUInt32(offset, &sampleDuration)) {
return ERROR_MALFORMED;
}
offset += 4;
}
if (flags & kSampleSizePresent) {
if (!mDataSource->getUInt32(offset, &sampleSize)) {
return ERROR_MALFORMED;
}
offset += 4;
}
if (flags & kSampleFlagsPresent) {
if (!mDataSource->getUInt32(offset, &sampleFlags)) {
return ERROR_MALFORMED;
}
offset += 4;
}
if (flags & kSampleCompositionTimeOffsetPresent) {
if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
return ERROR_MALFORMED;
}
offset += 4;
}
ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
" flags 0x%08x", i + 1,
dataOffset, sampleSize, sampleDuration,
(flags & kFirstSampleFlagsPresent) && i == 0
? firstSampleFlags : sampleFlags);
tmp.offset = dataOffset;
tmp.size = sampleSize;
tmp.duration = sampleDuration;
tmp.compositionOffset = sampleCtsOffset;
memset(tmp.iv, 0, sizeof(tmp.iv));
mCurrentSamples.add(tmp);
dataOffset += sampleSize;
}
mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
return OK;
}
status_t MPEG4Source::getFormat(MetaDataBase &meta) {
Mutex::Autolock autoLock(mLock);
meta = mFormat;
return OK;
}
size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
switch (mNALLengthSize) {
case 1:
return *data;
case 2:
return U16_AT(data);
case 3:
return ((size_t)data[0] << 16) | U16_AT(&data[1]);
case 4:
return U32_AT(data);
}
// This cannot happen, mNALLengthSize springs to life by adding 1 to
// a 2-bit integer.
CHECK(!"Should not be here.");
return 0;
}
status_t MPEG4Source::read(
MediaBufferBase **out, const ReadOptions *options) {
Mutex::Autolock autoLock(mLock);
CHECK(mStarted);
if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
*out = nullptr;
return WOULD_BLOCK;
}
if (mFirstMoofOffset > 0) {
return fragmentedRead(out, options);
}
*out = NULL;
int64_t targetSampleTimeUs = -1;
int64_t seekTimeUs;
ReadOptions::SeekMode mode;
if (options && options->getSeekTo(&seekTimeUs, &mode)) {
if (mIsHeif) {
CHECK(mSampleTable == NULL);
CHECK(mItemTable != NULL);
int32_t imageIndex;
if (!mFormat.findInt32(kKeyTrackID, &imageIndex)) {
return ERROR_MALFORMED;
}
status_t err;
if (seekTimeUs >= 0) {
err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
} else {
err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
}
if (err != OK) {
return err;
}
} else {
uint32_t findFlags = 0;
switch (mode) {
case ReadOptions::SEEK_PREVIOUS_SYNC:
findFlags = SampleTable::kFlagBefore;
break;
case ReadOptions::SEEK_NEXT_SYNC:
findFlags = SampleTable::kFlagAfter;
break;
case ReadOptions::SEEK_CLOSEST_SYNC:
case ReadOptions::SEEK_CLOSEST:
findFlags = SampleTable::kFlagClosest;
break;
case ReadOptions::SEEK_FRAME_INDEX:
findFlags = SampleTable::kFlagFrameIndex;
break;
default:
CHECK(!"Should not be here.");
break;
}
uint32_t sampleIndex;
status_t err = mSampleTable->findSampleAtTime(
seekTimeUs, 1000000, mTimescale,
&sampleIndex, findFlags);
if (mode == ReadOptions::SEEK_CLOSEST
|| mode == ReadOptions::SEEK_FRAME_INDEX) {
// We found the closest sample already, now we want the sync
// sample preceding it (or the sample itself of course), even
// if the subsequent sync sample is closer.
findFlags = SampleTable::kFlagBefore;
}
uint32_t syncSampleIndex;
if (err == OK) {
err = mSampleTable->findSyncSampleNear(
sampleIndex, &syncSampleIndex, findFlags);
}
uint32_t sampleTime;
if (err == OK) {
err = mSampleTable->getMetaDataForSample(
sampleIndex, NULL, NULL, &sampleTime);
}
if (err != OK) {
if (err == ERROR_OUT_OF_RANGE) {
// An attempt to seek past the end of the stream would
// normally cause this ERROR_OUT_OF_RANGE error. Propagating
// this all the way to the MediaPlayer would cause abnormal
// termination. Legacy behaviour appears to be to behave as if
// we had seeked to the end of stream, ending normally.
err = ERROR_END_OF_STREAM;
}
ALOGV("end of stream");
return err;
}
if (mode == ReadOptions::SEEK_CLOSEST
|| mode == ReadOptions::SEEK_FRAME_INDEX) {
targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
}
#if 0
uint32_t syncSampleTime;
CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
syncSampleIndex, NULL, NULL, &syncSampleTime));
ALOGI("seek to time %lld us => sample at time %lld us, "
"sync sample at time %lld us",
seekTimeUs,
sampleTime * 1000000ll / mTimescale,
syncSampleTime * 1000000ll / mTimescale);
#endif
mCurrentSampleIndex = syncSampleIndex;
}
if (mBuffer != NULL) {
mBuffer->release();
mBuffer = NULL;
}
// fall through
}
off64_t offset = 0;
size_t size = 0;
uint32_t cts, stts;
bool isSyncSample;
bool newBuffer = false;
if (mBuffer == NULL) {
newBuffer = true;
status_t err;
if (!mIsHeif) {
err = mSampleTable->getMetaDataForSample(
mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
} else {
err = mItemTable->getImageOffsetAndSize(
options && options->getSeekTo(&seekTimeUs, &mode) ?
&mCurrentSampleIndex : NULL, &offset, &size);
cts = stts = 0;
isSyncSample = 0;
ALOGV("image offset %lld, size %zu", (long long)offset, size);
}
if (err != OK) {
return err;
}
err = mGroup->acquire_buffer(&mBuffer);
if (err != OK) {
CHECK(mBuffer == NULL);
return err;
}
if (size > mBuffer->size()) {
ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
mBuffer->release();
mBuffer = NULL;
return ERROR_BUFFER_TOO_SMALL;
}
}
if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
if (newBuffer) {
ssize_t num_bytes_read =
mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
if (num_bytes_read < (ssize_t)size) {
mBuffer->release();
mBuffer = NULL;
return ERROR_IO;
}
CHECK(mBuffer != NULL);
mBuffer->set_range(0, size);
mBuffer->meta_data().clear();
mBuffer->meta_data().setInt64(
kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
mBuffer->meta_data().setInt64(
kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
if (targetSampleTimeUs >= 0) {
mBuffer->meta_data().setInt64(
kKeyTargetTime, targetSampleTimeUs);
}
if (isSyncSample) {
mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
}
++mCurrentSampleIndex;
}
if (!mIsAVC && !mIsHEVC) {
*out = mBuffer;
mBuffer = NULL;
return OK;
}
// Each NAL unit is split up into its constituent fragments and
// each one of them returned in its own buffer.
CHECK(mBuffer->range_length() >= mNALLengthSize);
const uint8_t *src =
(const uint8_t *)mBuffer->data() + mBuffer->range_offset();
size_t nal_size = parseNALSize(src);
if (mNALLengthSize > SIZE_MAX - nal_size) {
ALOGE("b/24441553, b/24445122");
}
if (mBuffer->range_length() - mNALLengthSize < nal_size) {
ALOGE("incomplete NAL unit.");
mBuffer->release();
mBuffer = NULL;
return ERROR_MALFORMED;
}
MediaBufferBase *clone = mBuffer->clone();
CHECK(clone != NULL);
clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
CHECK(mBuffer != NULL);
mBuffer->set_range(
mBuffer->range_offset() + mNALLengthSize + nal_size,
mBuffer->range_length() - mNALLengthSize - nal_size);
if (mBuffer->range_length() == 0) {
mBuffer->release();
mBuffer = NULL;
}
*out = clone;
return OK;
} else {
// Whole NAL units are returned but each fragment is prefixed by
// the start code (0x00 00 00 01).
ssize_t num_bytes_read = 0;
int32_t drm = 0;
bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
if (usesDRM) {
num_bytes_read =
mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
} else {
num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
}
if (num_bytes_read < (ssize_t)size) {
mBuffer->release();
mBuffer = NULL;
return ERROR_IO;
}
if (usesDRM) {
CHECK(mBuffer != NULL);
mBuffer->set_range(0, size);
} else {
uint8_t *dstData = (uint8_t *)mBuffer->data();
size_t srcOffset = 0;
size_t dstOffset = 0;
while (srcOffset < size) {
bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
size_t nalLength = 0;
if (!isMalFormed) {
nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
srcOffset += mNALLengthSize;
isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
}
if (isMalFormed) {
ALOGE("Video is malformed");
mBuffer->release();
mBuffer = NULL;
return ERROR_MALFORMED;
}
if (nalLength == 0) {
continue;
}
if (dstOffset > SIZE_MAX - 4 ||
dstOffset + 4 > SIZE_MAX - nalLength ||
dstOffset + 4 + nalLength > mBuffer->size()) {
ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
android_errorWriteLog(0x534e4554, "27208621");
mBuffer->release();
mBuffer = NULL;
return ERROR_MALFORMED;
}
dstData[dstOffset++] = 0;
dstData[dstOffset++] = 0;
dstData[dstOffset++] = 0;
dstData[dstOffset++] = 1;
memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
srcOffset += nalLength;
dstOffset += nalLength;
}
CHECK_EQ(srcOffset, size);
CHECK(mBuffer != NULL);
mBuffer->set_range(0, dstOffset);
}
mBuffer->meta_data().clear();
mBuffer->meta_data().setInt64(
kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
mBuffer->meta_data().setInt64(
kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
if (targetSampleTimeUs >= 0) {
mBuffer->meta_data().setInt64(
kKeyTargetTime, targetSampleTimeUs);
}
if (mIsAVC) {
uint32_t layerId = FindAVCLayerId(
(const uint8_t *)mBuffer->data(), mBuffer->range_length());
mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
}
if (isSyncSample) {
mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
}
++mCurrentSampleIndex;
*out = mBuffer;
mBuffer = NULL;
return OK;
}
}
status_t MPEG4Source::fragmentedRead(
MediaBufferBase **out, const ReadOptions *options) {
ALOGV("MPEG4Source::fragmentedRead");
CHECK(mStarted);
*out = NULL;
int64_t targetSampleTimeUs = -1;
int64_t seekTimeUs;
ReadOptions::SeekMode mode;
if (options && options->getSeekTo(&seekTimeUs, &mode)) {
int numSidxEntries = mSegments.size();
if (numSidxEntries != 0) {
int64_t totalTime = 0;
off64_t totalOffset = mFirstMoofOffset;
for (int i = 0; i < numSidxEntries; i++) {
const SidxEntry *se = &mSegments[i];
if (totalTime + se->mDurationUs > seekTimeUs) {
// The requested time is somewhere in this segment
if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
(mode == ReadOptions::SEEK_CLOSEST_SYNC &&
(seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
// requested next sync, or closest sync and it was closer to the end of
// this segment
totalTime += se->mDurationUs;
totalOffset += se->mSize;
}
break;
}
totalTime += se->mDurationUs;
totalOffset += se->mSize;
}
mCurrentMoofOffset = totalOffset;
mNextMoofOffset = -1;
mCurrentSamples.clear();
mCurrentSampleIndex = 0;
status_t err = parseChunk(&totalOffset);
if (err != OK) {
return err;
}
mCurrentTime = totalTime * mTimescale / 1000000ll;
} else {
// without sidx boxes, we can only seek to 0
mCurrentMoofOffset = mFirstMoofOffset;
mNextMoofOffset = -1;
mCurrentSamples.clear();
mCurrentSampleIndex = 0;
off64_t tmp = mCurrentMoofOffset;
status_t err = parseChunk(&tmp);
if (err != OK) {
return err;
}
mCurrentTime = 0;
}
if (mBuffer != NULL) {
mBuffer->release();
mBuffer = NULL;
}
// fall through
}
off64_t offset = 0;
size_t size = 0;
uint32_t cts = 0;
bool isSyncSample = false;
bool newBuffer = false;
if (mBuffer == NULL) {
newBuffer = true;
if (mCurrentSampleIndex >= mCurrentSamples.size()) {
// move to next fragment if there is one
if (mNextMoofOffset <= mCurrentMoofOffset) {
return ERROR_END_OF_STREAM;
}
off64_t nextMoof = mNextMoofOffset;
mCurrentMoofOffset = nextMoof;
mCurrentSamples.clear();
mCurrentSampleIndex = 0;
status_t err = parseChunk(&nextMoof);
if (err != OK) {
return err;
}
if (mCurrentSampleIndex >= mCurrentSamples.size()) {
return ERROR_END_OF_STREAM;
}
}
const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
offset = smpl->offset;
size = smpl->size;
cts = mCurrentTime + smpl->compositionOffset;
mCurrentTime += smpl->duration;
isSyncSample = (mCurrentSampleIndex == 0); // XXX
status_t err = mGroup->acquire_buffer(&mBuffer);
if (err != OK) {
CHECK(mBuffer == NULL);
ALOGV("acquire_buffer returned %d", err);
return err;
}
if (size > mBuffer->size()) {
ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
mBuffer->release();
mBuffer = NULL;
return ERROR_BUFFER_TOO_SMALL;
}
}
const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
MetaDataBase &bufmeta = mBuffer->meta_data();
bufmeta.clear();
if (smpl->encryptedsizes.size()) {
// store clear/encrypted lengths in metadata
bufmeta.setData(kKeyPlainSizes, 0,
smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
bufmeta.setData(kKeyEncryptedSizes, 0,
smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
bufmeta.setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
bufmeta.setInt32(kKeyCryptoMode, mCryptoMode);
bufmeta.setData(kKeyCryptoKey, 0, mCryptoKey, 16);
bufmeta.setInt32(kKeyEncryptedByteBlock, mDefaultEncryptedByteBlock);
bufmeta.setInt32(kKeySkipByteBlock, mDefaultSkipByteBlock);
uint32_t type = 0;
const void *iv = NULL;
size_t ivlength = 0;
if (!mFormat.findData(
kKeyCryptoIV, &type, &iv, &ivlength)) {
iv = smpl->iv;
ivlength = 16; // use 16 or the actual size?
}
bufmeta.setData(kKeyCryptoIV, 0, iv, ivlength);
}
if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
if (newBuffer) {
if (!isInRange((size_t)0u, mBuffer->size(), size)) {
mBuffer->release();
mBuffer = NULL;
ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
return ERROR_MALFORMED;
}
ssize_t num_bytes_read =
mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
if (num_bytes_read < (ssize_t)size) {
mBuffer->release();
mBuffer = NULL;
ALOGE("i/o error");
return ERROR_IO;
}
CHECK(mBuffer != NULL);
mBuffer->set_range(0, size);
mBuffer->meta_data().setInt64(
kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
mBuffer->meta_data().setInt64(
kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
if (targetSampleTimeUs >= 0) {
mBuffer->meta_data().setInt64(
kKeyTargetTime, targetSampleTimeUs);
}
if (mIsAVC) {
uint32_t layerId = FindAVCLayerId(
(const uint8_t *)mBuffer->data(), mBuffer->range_length());
mBuffer->meta_data().setInt32(kKeyTemporalLayerId, layerId);
}
if (isSyncSample) {
mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
}
++mCurrentSampleIndex;
}
if (!mIsAVC && !mIsHEVC) {
*out = mBuffer;
mBuffer = NULL;
return OK;
}
// Each NAL unit is split up into its constituent fragments and
// each one of them returned in its own buffer.
CHECK(mBuffer->range_length() >= mNALLengthSize);
const uint8_t *src =
(const uint8_t *)mBuffer->data() + mBuffer->range_offset();
size_t nal_size = parseNALSize(src);
if (mNALLengthSize > SIZE_MAX - nal_size) {
ALOGE("b/24441553, b/24445122");
}
if (mBuffer->range_length() - mNALLengthSize < nal_size) {
ALOGE("incomplete NAL unit.");
mBuffer->release();
mBuffer = NULL;
return ERROR_MALFORMED;
}
MediaBufferBase *clone = mBuffer->clone();
CHECK(clone != NULL);
clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
CHECK(mBuffer != NULL);
mBuffer->set_range(
mBuffer->range_offset() + mNALLengthSize + nal_size,
mBuffer->range_length() - mNALLengthSize - nal_size);
if (mBuffer->range_length() == 0) {
mBuffer->release();
mBuffer = NULL;
}
*out = clone;
return OK;
} else {
ALOGV("whole NAL");
// Whole NAL units are returned but each fragment is prefixed by
// the start code (0x00 00 00 01).
ssize_t num_bytes_read = 0;
int32_t drm = 0;
bool usesDRM = (mFormat.findInt32(kKeyIsDRM, &drm) && drm != 0);
void *data = NULL;
bool isMalFormed = false;
if (usesDRM) {
if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
isMalFormed = true;
} else {
data = mBuffer->data();
}
} else {
int32_t max_size;
if (!mFormat.findInt32(kKeyMaxInputSize, &max_size)
|| !isInRange((size_t)0u, (size_t)max_size, size)) {
isMalFormed = true;
} else {
data = mSrcBuffer;
}
}
if (isMalFormed || data == NULL) {
ALOGE("isMalFormed size %zu", size);
if (mBuffer != NULL) {
mBuffer->release();
mBuffer = NULL;
}
return ERROR_MALFORMED;
}
num_bytes_read = mDataSource->readAt(offset, data, size);
if (num_bytes_read < (ssize_t)size) {
mBuffer->release();
mBuffer = NULL;
ALOGE("i/o error");
return ERROR_IO;
}
if (usesDRM) {
CHECK(mBuffer != NULL);
mBuffer->set_range(0, size);
} else {
uint8_t *dstData = (uint8_t *)mBuffer->data();
size_t srcOffset = 0;
size_t dstOffset = 0;
while (srcOffset < size) {
isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
size_t nalLength = 0;
if (!isMalFormed) {
nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
srcOffset += mNALLengthSize;
isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
|| !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
|| !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
}
if (isMalFormed) {
ALOGE("Video is malformed; nalLength %zu", nalLength);
mBuffer->release();
mBuffer = NULL;
return ERROR_MALFORMED;
}
if (nalLength == 0) {
continue;
}
if (dstOffset > SIZE_MAX - 4 ||
dstOffset + 4 > SIZE_MAX - nalLength ||
dstOffset + 4 + nalLength > mBuffer->size()) {
ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
android_errorWriteLog(0x534e4554, "26365349");
mBuffer->release();
mBuffer = NULL;
return ERROR_MALFORMED;
}
dstData[dstOffset++] = 0;
dstData[dstOffset++] = 0;
dstData[dstOffset++] = 0;
dstData[dstOffset++] = 1;
memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
srcOffset += nalLength;
dstOffset += nalLength;
}
CHECK_EQ(srcOffset, size);
CHECK(mBuffer != NULL);
mBuffer->set_range(0, dstOffset);
}
mBuffer->meta_data().setInt64(
kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
mBuffer->meta_data().setInt64(
kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
if (targetSampleTimeUs >= 0) {
mBuffer->meta_data().setInt64(
kKeyTargetTime, targetSampleTimeUs);
}
if (isSyncSample) {
mBuffer->meta_data().setInt32(kKeyIsSyncFrame, 1);
}
++mCurrentSampleIndex;
*out = mBuffer;
mBuffer = NULL;
return OK;
}
}
MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
const char *mimePrefix) {
for (Track *track = mFirstTrack; track != NULL; track = track->next) {
const char *mime;
if (track->meta.findCString(kKeyMIMEType, &mime)
&& !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
return track;
}
}
return NULL;
}
static bool LegacySniffMPEG4(DataSourceBase *source, float *confidence) {
uint8_t header[8];
ssize_t n = source->readAt(4, header, sizeof(header));
if (n < (ssize_t)sizeof(header)) {
return false;
}
if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
|| !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
|| !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
|| !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
|| !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
|| !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
|| !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
|| !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
*confidence = 0.4;
return true;
}
return false;
}
static bool isCompatibleBrand(uint32_t fourcc) {
static const uint32_t kCompatibleBrands[] = {
FOURCC('i', 's', 'o', 'm'),
FOURCC('i', 's', 'o', '2'),
FOURCC('a', 'v', 'c', '1'),
FOURCC('h', 'v', 'c', '1'),
FOURCC('h', 'e', 'v', '1'),
FOURCC('3', 'g', 'p', '4'),
FOURCC('m', 'p', '4', '1'),
FOURCC('m', 'p', '4', '2'),
FOURCC('d', 'a', 's', 'h'),
// Won't promise that the following file types can be played.
// Just give these file types a chance.
FOURCC('q', 't', ' ', ' '), // Apple's QuickTime
FOURCC('M', 'S', 'N', 'V'), // Sony's PSP
FOURCC('3', 'g', '2', 'a'), // 3GPP2
FOURCC('3', 'g', '2', 'b'),
FOURCC('m', 'i', 'f', '1'), // HEIF image
FOURCC('h', 'e', 'i', 'c'), // HEIF image
FOURCC('m', 's', 'f', '1'), // HEIF image sequence
FOURCC('h', 'e', 'v', 'c'), // HEIF image sequence
};
for (size_t i = 0;
i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
++i) {
if (kCompatibleBrands[i] == fourcc) {
return true;
}
}
return false;
}
// Attempt to actually parse the 'ftyp' atom and determine if a suitable
// compatible brand is present.
// Also try to identify where this file's metadata ends
// (end of the 'moov' atom) and report it to the caller as part of
// the metadata.
static bool BetterSniffMPEG4(DataSourceBase *source, float *confidence) {
// We scan up to 128 bytes to identify this file as an MP4.
static const off64_t kMaxScanOffset = 128ll;
off64_t offset = 0ll;
bool foundGoodFileType = false;
off64_t moovAtomEndOffset = -1ll;
bool done = false;
while (!done && offset < kMaxScanOffset) {
uint32_t hdr[2];
if (source->readAt(offset, hdr, 8) < 8) {
return false;
}
uint64_t chunkSize = ntohl(hdr[0]);
uint32_t chunkType = ntohl(hdr[1]);
off64_t chunkDataOffset = offset + 8;
if (chunkSize == 1) {
if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
return false;
}
chunkSize = ntoh64(chunkSize);
chunkDataOffset += 8;
if (chunkSize < 16) {
// The smallest valid chunk is 16 bytes long in this case.
return false;
}
} else if (chunkSize < 8) {
// The smallest valid chunk is 8 bytes long.
return false;
}
// (data_offset - offset) is either 8 or 16
off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
if (chunkDataSize < 0) {
ALOGE("b/23540914");
return false;
}
char chunkstring[5];
MakeFourCCString(chunkType, chunkstring);
ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset);
switch (chunkType) {
case FOURCC('f', 't', 'y', 'p'):
{
if (chunkDataSize < 8) {
return false;
}
uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
if (i == 1) {
// Skip this index, it refers to the minorVersion,
// not a brand.
continue;
}
uint32_t brand;
if (source->readAt(
chunkDataOffset + 4 * i, &brand, 4) < 4) {
return false;
}
brand = ntohl(brand);
if (isCompatibleBrand(brand)) {
foundGoodFileType = true;
break;
}
}
if (!foundGoodFileType) {
return false;
}
break;
}
case FOURCC('m', 'o', 'o', 'v'):
{
moovAtomEndOffset = offset + chunkSize;
done = true;
break;
}
default:
break;
}
offset += chunkSize;
}
if (!foundGoodFileType) {
return false;
}
*confidence = 0.4f;
return true;
}
static MediaExtractor* CreateExtractor(DataSourceBase *source, void *) {
return new MPEG4Extractor(source);
}
static MediaExtractor::CreatorFunc Sniff(
DataSourceBase *source, float *confidence, void **,
MediaExtractor::FreeMetaFunc *) {
if (BetterSniffMPEG4(source, confidence)) {
return CreateExtractor;
}
if (LegacySniffMPEG4(source, confidence)) {
ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
return CreateExtractor;
}
return NULL;
}
extern "C" {
// This is the only symbol that needs to be exported
__attribute__ ((visibility ("default")))
MediaExtractor::ExtractorDef GETEXTRACTORDEF() {
return {
MediaExtractor::EXTRACTORDEF_VERSION,
UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
1, // version
"MP4 Extractor",
Sniff
};
}
} // extern "C"
} // namespace android