// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 2003-2015, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: pkgitems.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2005sep18 * created by: Markus W. Scherer * * Companion file to package.cpp. Deals with details of ICU data item formats. * Used for item dependencies. * Contains adapted code from ucnv_bld.c (swapper code from 2003). */ #include "unicode/utypes.h" #include "unicode/ures.h" #include "unicode/putil.h" #include "unicode/udata.h" #include "cstring.h" #include "uinvchar.h" #include "ucmndata.h" #include "udataswp.h" #include "swapimpl.h" #include "toolutil.h" #include "package.h" #include "pkg_imp.h" #include <stdio.h> #include <stdlib.h> #include <string.h> /* item formats in common */ #include "uresdata.h" #include "ucnv_bld.h" #include "ucnv_io.h" // general definitions ----------------------------------------------------- *** U_CDECL_BEGIN static void U_CALLCONV printError(void *context, const char *fmt, va_list args) { vfprintf((FILE *)context, fmt, args); } U_CDECL_END // a data item in native-platform form ------------------------------------- *** U_NAMESPACE_BEGIN class NativeItem { public: NativeItem() : pItem(NULL), pInfo(NULL), bytes(NULL), swapped(NULL), length(0) {} NativeItem(const Item *item, UDataSwapFn *swap) : swapped(NULL) { setItem(item, swap); } ~NativeItem() { delete [] swapped; } const UDataInfo *getDataInfo() const { return pInfo; } const uint8_t *getBytes() const { return bytes; } int32_t getLength() const { return length; } void setItem(const Item *item, UDataSwapFn *swap) { pItem=item; int32_t infoLength, itemHeaderLength; UErrorCode errorCode=U_ZERO_ERROR; pInfo=::getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode); if(U_FAILURE(errorCode)) { exit(errorCode); // should succeed because readFile() checks headers } length=pItem->length-itemHeaderLength; if(pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY) { bytes=pItem->data+itemHeaderLength; } else { UDataSwapper *ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", pItem->name, u_errorName(errorCode)); exit(errorCode); } ds->printError=printError; ds->printErrorContext=stderr; swapped=new uint8_t[pItem->length]; if(swapped==NULL) { fprintf(stderr, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem->name); exit(U_MEMORY_ALLOCATION_ERROR); } swap(ds, pItem->data, pItem->length, swapped, &errorCode); pInfo=::getDataInfo(swapped, pItem->length, infoLength, itemHeaderLength, &errorCode); bytes=swapped+itemHeaderLength; udata_closeSwapper(ds); } } private: const Item *pItem; const UDataInfo *pInfo; const uint8_t *bytes; uint8_t *swapped; int32_t length; }; // check a dependency ------------------------------------------------------ *** /* * assemble the target item name from the source item name, an ID * and a suffix */ static void makeTargetName(const char *itemName, const char *id, int32_t idLength, const char *suffix, char *target, int32_t capacity, UErrorCode *pErrorCode) { const char *itemID; int32_t treeLength, suffixLength, targetLength; // get the item basename itemID=strrchr(itemName, '/'); if(itemID!=NULL) { ++itemID; } else { itemID=itemName; } // build the target string treeLength=(int32_t)(itemID-itemName); if(idLength<0) { idLength=(int32_t)strlen(id); } suffixLength=(int32_t)strlen(suffix); targetLength=treeLength+idLength+suffixLength; if(targetLength>=capacity) { fprintf(stderr, "icupkg/makeTargetName(%s) target item name length %ld too long\n", itemName, (long)targetLength); *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return; } memcpy(target, itemName, treeLength); memcpy(target+treeLength, id, idLength); memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL } static void checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix, CheckDependency check, void *context, UErrorCode *pErrorCode) { char target[200]; makeTargetName(itemName, id, idLength, suffix, target, (int32_t)sizeof(target), pErrorCode); if(U_SUCCESS(*pErrorCode)) { check(context, itemName, target); } } /* assemble the target item name from the item's parent item name */ static void checkParent(const char *itemName, CheckDependency check, void *context, UErrorCode *pErrorCode) { const char *itemID, *parent, *parentLimit, *suffix; int32_t parentLength; // get the item basename itemID=strrchr(itemName, '/'); if(itemID!=NULL) { ++itemID; } else { itemID=itemName; } // get the item suffix suffix=strrchr(itemID, '.'); if(suffix==NULL) { // empty suffix, point to the end of the string suffix=strrchr(itemID, 0); } // get the position of the last '_' for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {} if(parentLimit!=itemID) { // get the parent item name by truncating the last part of this item's name */ parent=itemID; parentLength=(int32_t)(parentLimit-itemID); } else { // no '_' in the item name: the parent is the root bundle parent="root"; parentLength=4; if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) { // the item itself is "root", which does not depend on a parent return; } } checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode); } // get dependencies from resource bundles ---------------------------------- *** static const UChar SLASH=0x2f; /* * Check for the alias from the string or alias resource res. */ static void checkAlias(const char *itemName, Resource res, const UChar *alias, int32_t length, UBool useResSuffix, CheckDependency check, void *context, UErrorCode *pErrorCode) { int32_t i; if(!uprv_isInvariantUString(alias, length)) { fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n", itemName, res); *pErrorCode=U_INVALID_CHAR_FOUND; return; } // extract the locale ID from alias strings like // locale_ID/key1/key2/key3 // locale_ID // search for the first slash for(i=0; i<length && alias[i]!=SLASH; ++i) {} if(res_getPublicType(res)==URES_ALIAS) { // ignore aliases with an initial slash: // /ICUDATA/... and /pkgname/... go to a different package // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle if(i==0) { return; // initial slash ('/') } // ignore the intra-bundle path starting from the first slash ('/') length=i; } else /* URES_STRING */ { // the whole string should only consist of a locale ID if(i!=length) { fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n", itemName, res); *pErrorCode=U_UNSUPPORTED_ERROR; return; } } // convert the Unicode string to char * char localeID[32]; if(length>=(int32_t)sizeof(localeID)) { fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n", itemName, res, (long)length); *pErrorCode=U_BUFFER_OVERFLOW_ERROR; return; } u_UCharsToChars(alias, localeID, length); localeID[length]=0; checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode); } /* * Enumerate one resource item and its children and extract dependencies from * aliases. */ static void ures_enumDependencies(const char *itemName, const ResourceData *pResData, Resource res, const char *inKey, const char *parentKey, int32_t depth, CheckDependency check, void *context, Package *pkg, UErrorCode *pErrorCode) { switch(res_getPublicType(res)) { case URES_STRING: { UBool useResSuffix = TRUE; // Check for %%ALIAS if(depth==1 && inKey!=NULL) { if(0!=strcmp(inKey, "%%ALIAS")) { break; } } // Check for %%DEPENDENCY else if(depth==2 && parentKey!=NULL) { if(0!=strcmp(parentKey, "%%DEPENDENCY")) { break; } useResSuffix = FALSE; } else { // we ignore all other strings break; } int32_t length; const UChar *alias=res_getString(pResData, res, &length); checkAlias(itemName, res, alias, length, useResSuffix, check, context, pErrorCode); } break; case URES_ALIAS: { int32_t length; const UChar *alias=res_getAlias(pResData, res, &length); checkAlias(itemName, res, alias, length, TRUE, check, context, pErrorCode); } break; case URES_TABLE: { /* recurse */ int32_t count=res_countArrayItems(pResData, res); for(int32_t i=0; i<count; ++i) { const char *itemKey; Resource item=res_getTableItemByIndex(pResData, res, i, &itemKey); ures_enumDependencies( itemName, pResData, item, itemKey, inKey, depth+1, check, context, pkg, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n", itemName, res, i, itemKey, item); break; } } } break; case URES_ARRAY: { /* recurse */ int32_t count=res_countArrayItems(pResData, res); for(int32_t i=0; i<count; ++i) { Resource item=res_getArrayItem(pResData, res, i); ures_enumDependencies( itemName, pResData, item, NULL, inKey, depth+1, check, context, pkg, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n", itemName, res, i, item); break; } } } break; default: break; } } static void ures_enumDependencies(const char *itemName, const UDataInfo *pInfo, const uint8_t *inBytes, int32_t length, CheckDependency check, void *context, Package *pkg, UErrorCode *pErrorCode) { ResourceData resData; res_read(&resData, pInfo, inBytes, length, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n", pInfo->formatVersion[0], pInfo->formatVersion[1]); exit(U_UNSUPPORTED_ERROR); } /* * if the bundle attributes are present and the nofallback flag is not set, * then add the parent bundle as a dependency */ if(pInfo->formatVersion[0]>1 || (pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1)) { if(!resData.noFallback) { /* this bundle participates in locale fallback */ checkParent(itemName, check, context, pErrorCode); } } icu::NativeItem nativePool; if(resData.usesPoolBundle) { char poolName[200]; makeTargetName(itemName, "pool", 4, ".res", poolName, (int32_t)sizeof(poolName), pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } check(context, itemName, poolName); int32_t index=pkg->findItem(poolName); if(index<0) { // We cannot work with a bundle if its pool resource is missing. // check() already printed a complaint. return; } // TODO: Cache the native version in the Item itself. nativePool.setItem(pkg->getItem(index), ures_swap); const UDataInfo *poolInfo=nativePool.getDataInfo(); if(poolInfo->formatVersion[0]<=1) { fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName); return; } const int32_t *poolRoot=(const int32_t *)nativePool.getBytes(); const int32_t *poolIndexes=poolRoot+1; int32_t poolIndexLength=poolIndexes[URES_INDEX_LENGTH]&0xff; if(!(poolIndexLength>URES_INDEX_POOL_CHECKSUM && (poolIndexes[URES_INDEX_ATTRIBUTES]&URES_ATT_IS_POOL_BUNDLE)) ) { fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName); return; } if(resData.pRoot[1+URES_INDEX_POOL_CHECKSUM]==poolIndexes[URES_INDEX_POOL_CHECKSUM]) { resData.poolBundleKeys=(const char *)(poolIndexes+poolIndexLength); resData.poolBundleStrings=(const uint16_t *)(poolRoot+poolIndexes[URES_INDEX_KEYS_TOP]); } else { fprintf(stderr, "icupkg: %s has mismatched checksum for %s\n", poolName, itemName); return; } } ures_enumDependencies( itemName, &resData, resData.rootRes, NULL, NULL, 0, check, context, pkg, pErrorCode); } // get dependencies from conversion tables --------------------------------- *** /* code adapted from ucnv_swap() */ static void ucnv_enumDependencies(const UDataSwapper *ds, const char *itemName, const UDataInfo *pInfo, const uint8_t *inBytes, int32_t length, CheckDependency check, void *context, UErrorCode *pErrorCode) { uint32_t staticDataSize; const UConverterStaticData *inStaticData; const _MBCSHeader *inMBCSHeader; uint8_t outputType; /* check format version */ if(!( pInfo->formatVersion[0]==6 && pInfo->formatVersion[1]>=2 )) { fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n", pInfo->formatVersion[0], pInfo->formatVersion[1]); exit(U_UNSUPPORTED_ERROR); } /* read the initial UConverterStaticData structure after the UDataInfo header */ inStaticData=(const UConverterStaticData *)inBytes; if( length<(int32_t)sizeof(UConverterStaticData) || (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) ) { udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } inBytes+=staticDataSize; length-=(int32_t)staticDataSize; /* check for supported conversionType values */ if(inStaticData->conversionType==UCNV_MBCS) { /* MBCS data */ uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions; int32_t extOffset; inMBCSHeader=(const _MBCSHeader *)inBytes; if(length<(int32_t)sizeof(_MBCSHeader)) { udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))& MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 ) { mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK; } else { udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n", inMBCSHeader->version[0], inMBCSHeader->version[1]); *pErrorCode=U_UNSUPPORTED_ERROR; return; } mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags); extOffset=(int32_t)(mbcsHeaderFlags>>8); outputType=(uint8_t)mbcsHeaderFlags; if(outputType==MBCS_OUTPUT_EXT_ONLY) { /* * extension-only file, * contains a base name instead of normal base table data */ char baseName[32]; int32_t baseNameLength; /* there is extension data after the base data, see ucnv_ext.h */ if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", length); *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return; } /* swap the base name, between the header and the extension data */ const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4; baseNameLength=(int32_t)strlen(inBaseName); if(baseNameLength>=(int32_t)sizeof(baseName)) { udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n", itemName, baseNameLength); *pErrorCode=U_UNSUPPORTED_ERROR; return; } ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode); checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode); } } } // ICU data formats -------------------------------------------------------- *** static const struct { uint8_t dataFormat[4]; } dataFormats[]={ { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */ { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */ { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */ }; enum { FMT_RES, FMT_CNV, FMT_ALIAS, FMT_COUNT }; static int32_t getDataFormat(const uint8_t dataFormat[4]) { int32_t i; for(i=0; i<FMT_COUNT; ++i) { if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) { return i; } } return -1; } // enumerate dependencies of a package item -------------------------------- *** void Package::enumDependencies(Item *pItem, void *context, CheckDependency check) { int32_t infoLength, itemHeaderLength; UErrorCode errorCode=U_ZERO_ERROR; const UDataInfo *pInfo=getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode); if(U_FAILURE(errorCode)) { return; // should not occur because readFile() checks headers } // find the data format and call the corresponding function, if any int32_t format=getDataFormat(pInfo->dataFormat); if(format>=0) { switch(format) { case FMT_RES: { /* * Swap the resource bundle (if necessary) so that we can use * the normal runtime uresdata.c code to read it. * We do not want to duplicate that code, especially not together with on-the-fly swapping. */ NativeItem nrb(pItem, ures_swap); ures_enumDependencies(pItem->name, nrb.getDataInfo(), nrb.getBytes(), nrb.getLength(), check, context, this, &errorCode); break; } case FMT_CNV: { // TODO: share/cache swappers UDataSwapper *ds=udata_openSwapper( (UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", pItem->name, u_errorName(errorCode)); exit(errorCode); } ds->printError=printError; ds->printErrorContext=stderr; const uint8_t *inBytes=pItem->data+itemHeaderLength; int32_t length=pItem->length-itemHeaderLength; ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode); udata_closeSwapper(ds); break; } default: break; } if(U_FAILURE(errorCode)) { exit(errorCode); } } } U_NAMESPACE_END