/* ********************************************************************** * Copyright (C) 2002-2010, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 10/11/02 aliu Creation. ********************************************************************** */ #include "unicode/utypes.h" #include "unicode/putil.h" #include "unicode/uclean.h" #include "cmemory.h" #include "cstring.h" #include "filestrm.h" #include "uarrsort.h" #include "unewdata.h" #include "uoptions.h" #include "uprops.h" #include "propname.h" #include "uassert.h" #include <stdio.h> U_NAMESPACE_USE // TODO: Clean up and comment this code. //---------------------------------------------------------------------- // BEGIN DATA // // This is the raw data to be output. We define the data structure, // then include a machine-generated header that contains the actual // data. #include "unicode/uchar.h" #include "unicode/uscript.h" #include "unicode/unorm.h" #include "unicode/unorm2.h" class AliasName { public: const char* str; int32_t index; AliasName(const char* str, int32_t index); int compare(const AliasName& other) const; UBool operator==(const AliasName& other) const { return compare(other) == 0; } UBool operator!=(const AliasName& other) const { return compare(other) != 0; } }; AliasName::AliasName(const char* _str, int32_t _index) : str(_str), index(_index) { } int AliasName::compare(const AliasName& other) const { return uprv_comparePropertyNames(str, other.str); } class Alias { public: int32_t enumValue; int32_t nameGroupIndex; Alias(int32_t enumValue, int32_t nameGroupIndex); int32_t getUniqueNames(int32_t* nameGroupIndices) const; }; Alias::Alias(int32_t anEnumValue, int32_t aNameGroupIndex) : enumValue(anEnumValue), nameGroupIndex(aNameGroupIndex) { } class Property : public Alias { public: int32_t valueCount; const Alias* valueList; Property(int32_t enumValue, int32_t nameGroupIndex, int32_t valueCount, const Alias* valueList); }; Property::Property(int32_t _enumValue, int32_t _nameGroupIndex, int32_t _valueCount, const Alias* _valueList) : Alias(_enumValue, _nameGroupIndex), valueCount(_valueCount), valueList(_valueList) { } // *** Include the data header *** #include "data.h" /* return a list of unique names, not including "", for this property * @param stringIndices array of at least MAX_NAMES_PER_GROUP * elements, will be filled with indices into STRING_TABLE * @return number of indices, >= 1 */ int32_t Alias::getUniqueNames(int32_t* stringIndices) const { int32_t count = 0; int32_t i = nameGroupIndex; UBool done = FALSE; while (!done) { int32_t j = NAME_GROUP[i++]; if (j < 0) { done = TRUE; j = -j; } if (j == 0) continue; // omit "" entries UBool dupe = FALSE; for (int32_t k=0; k<count; ++k) { if (stringIndices[k] == j) { dupe = TRUE; break; } // also do a string check for things like "age|Age" if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) { //printf("Found dupe %s|%s\n", // STRING_TABLE[stringIndices[k]].str, // STRING_TABLE[j].str); dupe = TRUE; break; } } if (dupe) continue; // omit duplicates stringIndices[count++] = j; } return count; } // END DATA //---------------------------------------------------------------------- #define MALLOC(type, count) \ (type*) uprv_malloc(sizeof(type) * count) void die(const char* msg) { fprintf(stderr, "Error: %s\n", msg); exit(1); } //---------------------------------------------------------------------- /** * A list of Alias objects. */ class AliasList { public: virtual ~AliasList(); virtual const Alias& operator[](int32_t i) const = 0; virtual int32_t count() const = 0; }; AliasList::~AliasList() {} /** * A single array. */ class AliasArrayList : public AliasList { const Alias* a; int32_t n; public: AliasArrayList(const Alias* _a, int32_t _n) { a = _a; n = _n; } virtual const Alias& operator[](int32_t i) const { return a[i]; } virtual int32_t count() const { return n; } }; /** * A single array. */ class PropertyArrayList : public AliasList { const Property* a; int32_t n; public: PropertyArrayList(const Property* _a, int32_t _n) { a = _a; n = _n; } virtual const Alias& operator[](int32_t i) const { return a[i]; } virtual int32_t count() const { return n; } }; //---------------------------------------------------------------------- /** * An element in a name index. It maps a name (given by index) into * an enum value. */ class NameToEnumEntry { public: int32_t nameIndex; int32_t enumValue; NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; } }; // Sort function for NameToEnumEntry (sort by name) U_CFUNC int32_t compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) { return STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex]. compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]); } //---------------------------------------------------------------------- /** * An element in an enum index. It maps an enum into a name group entry * (given by index). */ class EnumToNameGroupEntry { public: int32_t enumValue; int32_t nameGroupIndex; EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; } // are enumValues contiguous for count entries starting with this one? // ***!!!*** we assume we are in an array and look at neighbors ***!!!*** UBool isContiguous(int32_t count) const { const EnumToNameGroupEntry* p = this; for (int32_t i=1; i<count; ++i) { if (p[i].enumValue != (this->enumValue + i)) { return FALSE; } } return TRUE; } }; // Sort function for EnumToNameGroupEntry (sort by name index) U_CFUNC int32_t compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) { return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue; } //---------------------------------------------------------------------- /** * An element in the map from enumerated property enums to value maps. */ class EnumToValueEntry { public: int32_t enumValue; EnumToNameGroupEntry* enumToName; int32_t enumToName_count; NameToEnumEntry* nameToEnum; int32_t nameToEnum_count; // are enumValues contiguous for count entries starting with this one? // ***!!!*** we assume we are in an array and look at neighbors ***!!!*** UBool isContiguous(int32_t count) const { const EnumToValueEntry* p = this; for (int32_t i=1; i<count; ++i) { if (p[i].enumValue != (this->enumValue + i)) { return FALSE; } } return TRUE; } }; // Sort function for EnumToValueEntry (sort by enum) U_CFUNC int32_t compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) { return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue; } //---------------------------------------------------------------------- // BEGIN Builder #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET)) class Builder { // header: PropertyAliases header; // 0: NonContiguousEnumToOffset* enumToName; int32_t enumToName_size; Offset enumToName_offset; // 1: (deleted) // 2: NameToEnum* nameToEnum; int32_t nameToEnum_size; Offset nameToEnum_offset; // 3: NonContiguousEnumToOffset* enumToValue; int32_t enumToValue_size; Offset enumToValue_offset; // 4: ValueMap* valueMap; int32_t valueMap_size; int32_t valueMap_count; Offset valueMap_offset; // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is // NULL and one is not. valueEnumToName_size[i] is the size of // the non-NULL one. i=0..valueMapCount-1 // 5a: EnumToOffset** valueEnumToName; // 5b: NonContiguousEnumToOffset** valueNCEnumToName; int32_t* valueEnumToName_size; Offset* valueEnumToName_offset; // 6: // arrays of valueMap_count pointers, sizes, & offsets NameToEnum** valueNameToEnum; int32_t* valueNameToEnum_size; Offset* valueNameToEnum_offset; // 98: Offset* nameGroupPool; int32_t nameGroupPool_count; int32_t nameGroupPool_size; Offset nameGroupPool_offset; // 99: char* stringPool; int32_t stringPool_count; int32_t stringPool_size; Offset stringPool_offset; Offset* stringPool_offsetArray; // relative to stringPool int32_t total_size; // size of everything int32_t debug; public: Builder(int32_t debugLevel); ~Builder(); void buildTopLevelProperties(const NameToEnumEntry* propName, int32_t propNameCount, const EnumToNameGroupEntry* propEnum, int32_t propEnumCount); void buildValues(const EnumToValueEntry* e2v, int32_t count); void buildStringPool(const AliasName* propertyNames, int32_t propertyNameCount, const int32_t* nameGroupIndices, int32_t nameGroupIndicesCount); void fixup(); int8_t* createData(int32_t& length) const; private: static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng, int32_t count, int32_t& size); static NonContiguousEnumToOffset* buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng, int32_t count, int32_t& size); static NonContiguousEnumToOffset* buildNCEnumToValue(const EnumToValueEntry* e2v, int32_t count, int32_t& size); static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum, int32_t count, int32_t& size); Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const; void fixupNameToEnum(NameToEnum* n); void fixupEnumToNameGroup(EnumToOffset* e2ng); void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng); void computeOffsets(); void fixupStringPoolOffsets(); void fixupNameGroupPoolOffsets(); void fixupMiscellaneousOffsets(); static int32_t align(int32_t a); static void erase(void* p, int32_t size); }; Builder::Builder(int32_t debugLevel) { debug = debugLevel; enumToName = 0; nameToEnum = 0; enumToValue = 0; valueMap_count = 0; valueMap = 0; valueEnumToName = 0; valueNCEnumToName = 0; valueEnumToName_size = 0; valueEnumToName_offset = 0; valueNameToEnum = 0; valueNameToEnum_size = 0; valueNameToEnum_offset = 0; nameGroupPool = 0; stringPool = 0; stringPool_offsetArray = 0; } Builder::~Builder() { uprv_free(enumToName); uprv_free(nameToEnum); uprv_free(enumToValue); uprv_free(valueMap); for (int32_t i=0; i<valueMap_count; ++i) { uprv_free(valueEnumToName[i]); uprv_free(valueNCEnumToName[i]); uprv_free(valueNameToEnum[i]); } uprv_free(valueEnumToName); uprv_free(valueNCEnumToName); uprv_free(valueEnumToName_size); uprv_free(valueEnumToName_offset); uprv_free(valueNameToEnum); uprv_free(valueNameToEnum_size); uprv_free(valueNameToEnum_offset); uprv_free(nameGroupPool); uprv_free(stringPool); uprv_free(stringPool_offsetArray); } int32_t Builder::align(int32_t a) { U_ASSERT(a >= 0); int32_t k = a % sizeof(int32_t); if (k == 0) { return a; } a += sizeof(int32_t) - k; return a; } void Builder::erase(void* p, int32_t size) { U_ASSERT(size >= 0); int8_t* q = (int8_t*) p; while (size--) { *q++ = 0; } } EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng, int32_t count, int32_t& size) { U_ASSERT(e2ng->isContiguous(count)); size = align(EnumToOffset::getSize(count)); EnumToOffset* result = (EnumToOffset*) uprv_malloc(size); erase(result, size); result->enumStart = e2ng->enumValue; result->enumLimit = e2ng->enumValue + count; Offset* p = result->getOffsetArray(); for (int32_t i=0; i<count; ++i) { // set these to NGI index values // fix them up to NGI offset values U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex)); p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later } return result; } NonContiguousEnumToOffset* Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng, int32_t count, int32_t& size) { U_ASSERT(!e2ng->isContiguous(count)); size = align(NonContiguousEnumToOffset::getSize(count)); NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size); erase(nc, size); nc->count = count; EnumValue* e = nc->getEnumArray(); Offset* p = nc->getOffsetArray(); for (int32_t i=0; i<count; ++i) { // set these to NGI index values // fix them up to NGI offset values e[i] = e2ng[i].enumValue; U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex)); p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later } return nc; } NonContiguousEnumToOffset* Builder::buildNCEnumToValue(const EnumToValueEntry* e2v, int32_t count, int32_t& size) { U_ASSERT(!e2v->isContiguous(count)); size = align(NonContiguousEnumToOffset::getSize(count)); NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size); erase(result, size); result->count = count; EnumValue* e = result->getEnumArray(); for (int32_t i=0; i<count; ++i) { e[i] = e2v[i].enumValue; // offset must be set later } return result; } /** * Given an index into the string pool, return an offset. computeOffsets() * must have been called already. If allowNegative is true, allow negatives * and preserve their sign. */ Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const { // Index 0 is ""; we turn this into an Offset of zero if (index == 0) return 0; if (index < 0) { if (allowNegative) { return -Builder::stringIndexToOffset(-index); } else { die("Negative string pool index"); } } else { if (index >= stringPool_count) { die("String pool index too large"); } Offset result = stringPool_offset + stringPool_offsetArray[index]; U_ASSERT(result >= 0 && result < total_size); return result; } return 0; // never executed; make compiler happy } NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum, int32_t count, int32_t& size) { size = align(NameToEnum::getSize(count)); NameToEnum* n2e = (NameToEnum*) uprv_malloc(size); erase(n2e, size); n2e->count = count; Offset* p = n2e->getNameArray(); EnumValue* e = n2e->getEnumArray(); for (int32_t i=0; i<count; ++i) { // set these to SP index values // fix them up to SP offset values U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex)); p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later e[i] = nameToEnum[i].enumValue; } return n2e; } void Builder::buildTopLevelProperties(const NameToEnumEntry* propName, int32_t propNameCount, const EnumToNameGroupEntry* propEnum, int32_t propEnumCount) { enumToName = buildNCEnumToNameGroup(propEnum, propEnumCount, enumToName_size); nameToEnum = buildNameToEnum(propName, propNameCount, nameToEnum_size); } void Builder::buildValues(const EnumToValueEntry* e2v, int32_t count) { int32_t i; U_ASSERT(!e2v->isContiguous(count)); valueMap_count = count; enumToValue = buildNCEnumToValue(e2v, count, enumToValue_size); valueMap_size = align(count * sizeof(ValueMap)); valueMap = (ValueMap*) uprv_malloc(valueMap_size); erase(valueMap, valueMap_size); valueEnumToName = MALLOC(EnumToOffset*, count); valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count); valueEnumToName_size = MALLOC(int32_t, count); valueEnumToName_offset = MALLOC(Offset, count); valueNameToEnum = MALLOC(NameToEnum*, count); valueNameToEnum_size = MALLOC(int32_t, count); valueNameToEnum_offset = MALLOC(Offset, count); for (i=0; i<count; ++i) { UBool isContiguous = e2v[i].enumToName->isContiguous(e2v[i].enumToName_count); valueEnumToName[i] = 0; valueNCEnumToName[i] = 0; if (isContiguous) { valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName, e2v[i].enumToName_count, valueEnumToName_size[i]); } else { valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName, e2v[i].enumToName_count, valueEnumToName_size[i]); } valueNameToEnum[i] = buildNameToEnum(e2v[i].nameToEnum, e2v[i].nameToEnum_count, valueNameToEnum_size[i]); } } void Builder::buildStringPool(const AliasName* propertyNames, int32_t propertyNameCount, const int32_t* nameGroupIndices, int32_t nameGroupIndicesCount) { int32_t i; nameGroupPool_count = nameGroupIndicesCount; nameGroupPool_size = sizeof(Offset) * nameGroupPool_count; nameGroupPool = MALLOC(Offset, nameGroupPool_count); for (i=0; i<nameGroupPool_count; ++i) { // Some indices are negative. int32_t a = nameGroupIndices[i]; if (a < 0) a = -a; U_ASSERT(IS_VALID_OFFSET(a)); nameGroupPool[i] = (Offset) nameGroupIndices[i]; } stringPool_count = propertyNameCount; stringPool_size = 0; // first string must be "" -- we skip it U_ASSERT(*propertyNames[0].str == 0); for (i=1 /*sic*/; i<propertyNameCount; ++i) { stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1); } stringPool = MALLOC(char, stringPool_size); stringPool_offsetArray = MALLOC(Offset, stringPool_count); Offset soFar = 0; char* p = stringPool; stringPool_offsetArray[0] = -1; // we don't use this entry for (i=1 /*sic*/; i<propertyNameCount; ++i) { const char* str = propertyNames[i].str; int32_t len = (int32_t)uprv_strlen(str); uprv_strcpy(p, str); p += len; *p++ = 0; stringPool_offsetArray[i] = soFar; soFar += (Offset)(len+1); } U_ASSERT(soFar == stringPool_size); U_ASSERT(p == (stringPool + stringPool_size)); } // Confirm that PropertyAliases is a POD (plain old data; see C++ // std). The following union will _fail to compile_ if // PropertyAliases is _not_ a POD. (Note: We used to use the offsetof // macro to check this, but that's not quite right, so that test is // commented out -- see below.) typedef union { int32_t i; PropertyAliases p; } PropertyAliasesPODTest; void Builder::computeOffsets() { int32_t i; Offset off = sizeof(header); if (debug>0) { printf("header \t offset=%4d size=%5d\n", 0, off); } // PropertyAliases must have no v-table and must be // padded (if necessary) to the next 32-bit boundary. //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above U_ASSERT(sizeof(header) % sizeof(int32_t) == 0); #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t) #define COMPUTE_OFFSET2(foo,type) \ if (debug>0)\ printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\ foo##_offset = off;\ U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\ U_ASSERT(foo##_offset % sizeof(type) == 0);\ off = (Offset) (off + foo##_size); COMPUTE_OFFSET(enumToName); // 0: COMPUTE_OFFSET(nameToEnum); // 2: COMPUTE_OFFSET(enumToValue); // 3: COMPUTE_OFFSET(valueMap); // 4: for (i=0; i<valueMap_count; ++i) { if (debug>0) { printf(" enumToName[%d]\t offset=%4d size=%5d\n", (int)i, off, (int)valueEnumToName_size[i]); } valueEnumToName_offset[i] = off; // 5: U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i])); off = (Offset) (off + valueEnumToName_size[i]); if (debug>0) { printf(" nameToEnum[%d]\t offset=%4d size=%5d\n", (int)i, off, (int)valueNameToEnum_size[i]); } valueNameToEnum_offset[i] = off; // 6: U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i])); off = (Offset) (off + valueNameToEnum_size[i]); } // These last two chunks have weaker alignment needs COMPUTE_OFFSET2(nameGroupPool,Offset); // 98: COMPUTE_OFFSET2(stringPool,char); // 99: total_size = off; if (debug>0) printf("total size=%5d\n\n", (int)total_size); U_ASSERT(total_size <= (MAX_OFFSET+1)); } void Builder::fixupNameToEnum(NameToEnum* n) { // Fix the string pool offsets in n Offset* p = n->getNameArray(); for (int32_t i=0; i<n->count; ++i) { p[i] = stringIndexToOffset(p[i]); } } void Builder::fixupStringPoolOffsets() { int32_t i; // 2: fixupNameToEnum(nameToEnum); // 6: for (i=0; i<valueMap_count; ++i) { fixupNameToEnum(valueNameToEnum[i]); } // 98: for (i=0; i<nameGroupPool_count; ++i) { nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE); } } void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) { EnumValue i; int32_t j; Offset* p = e2ng->getOffsetArray(); for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) { p[j] = nameGroupPool_offset + sizeof(Offset) * p[j]; } } void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) { int32_t i; /*EnumValue* e = e2ng->getEnumArray();*/ Offset* p = e2ng->getOffsetArray(); for (i=0; i<e2ng->count; ++i) { p[i] = nameGroupPool_offset + sizeof(Offset) * p[i]; } } void Builder::fixupNameGroupPoolOffsets() { int32_t i; // 0: fixupNCEnumToNameGroup(enumToName); // 1: (deleted) // 5: for (i=0; i<valueMap_count; ++i) { // 5a: if (valueEnumToName[i] != 0) { fixupEnumToNameGroup(valueEnumToName[i]); } // 5b: if (valueNCEnumToName[i] != 0) { fixupNCEnumToNameGroup(valueNCEnumToName[i]); } } } void Builder::fixupMiscellaneousOffsets() { int32_t i; // header: erase(&header, sizeof(header)); header.enumToName_offset = enumToName_offset; header.nameToEnum_offset = nameToEnum_offset; header.enumToValue_offset = enumToValue_offset; // header meta-info used by Java: U_ASSERT(total_size > 0 && total_size < 0x7FFF); header.total_size = (int16_t) total_size; header.valueMap_offset = valueMap_offset; header.valueMap_count = (int16_t) valueMap_count; header.nameGroupPool_offset = nameGroupPool_offset; header.nameGroupPool_count = (int16_t) nameGroupPool_count; header.stringPool_offset = stringPool_offset; header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry U_ASSERT(valueMap_count <= 0x7FFF); U_ASSERT(nameGroupPool_count <= 0x7FFF); U_ASSERT(stringPool_count <= 0x7FFF); // 3: Offset* p = enumToValue->getOffsetArray(); /*EnumValue* e = enumToValue->getEnumArray();*/ U_ASSERT(valueMap_count == enumToValue->count); for (i=0; i<valueMap_count; ++i) { p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i); } // 4: for (i=0; i<valueMap_count; ++i) { ValueMap& v = valueMap[i]; v.enumToName_offset = v.ncEnumToName_offset = 0; if (valueEnumToName[i] != 0) { v.enumToName_offset = valueEnumToName_offset[i]; } if (valueNCEnumToName[i] != 0) { v.ncEnumToName_offset = valueEnumToName_offset[i]; } v.nameToEnum_offset = valueNameToEnum_offset[i]; } } void Builder::fixup() { computeOffsets(); fixupStringPoolOffsets(); fixupNameGroupPoolOffsets(); fixupMiscellaneousOffsets(); } int8_t* Builder::createData(int32_t& length) const { length = total_size; int8_t* result = MALLOC(int8_t, length); int8_t* p = result; int8_t* limit = result + length; #define APPEND2(x, size) \ U_ASSERT((p+size)<=limit); \ uprv_memcpy(p, x, size); \ p += size #define APPEND(x) APPEND2(x, x##_size) APPEND2(&header, sizeof(header)); APPEND(enumToName); APPEND(nameToEnum); APPEND(enumToValue); APPEND(valueMap); for (int32_t i=0; i<valueMap_count; ++i) { U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) || (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0)); if (valueEnumToName[i] != 0) { APPEND2(valueEnumToName[i], valueEnumToName_size[i]); } if (valueNCEnumToName[i] != 0) { APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]); } APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]); } APPEND(nameGroupPool); APPEND(stringPool); if (p != limit) { fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit); exit(1); } return result; } // END Builder //---------------------------------------------------------------------- /* UDataInfo cf. udata.h */ static UDataInfo dataInfo = { sizeof(UDataInfo), 0, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, sizeof(UChar), 0, {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3}, {PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */ {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */ }; class genpname { // command-line options UBool useCopyright; UBool verbose; int32_t debug; public: int MMain(int argc, char *argv[]); private: NameToEnumEntry* createNameIndex(const AliasList& list, int32_t& nameIndexCount); EnumToNameGroupEntry* createEnumIndex(const AliasList& list); int32_t writeDataFile(const char *destdir, const Builder&); }; int main(int argc, char *argv[]) { UErrorCode status = U_ZERO_ERROR; u_init(&status); if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { // Note: u_init() will try to open ICU property data. // failures here are expected when building ICU from scratch. // ignore them. fprintf(stderr, "genpname: can not initialize ICU. Status = %s\n", u_errorName(status)); exit(1); } genpname app; U_MAIN_INIT_ARGS(argc, argv); int retVal = app.MMain(argc, argv); u_cleanup(); return retVal; } static UOption options[]={ UOPTION_HELP_H, UOPTION_HELP_QUESTION_MARK, UOPTION_COPYRIGHT, UOPTION_DESTDIR, UOPTION_VERBOSE, UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG), }; NameToEnumEntry* genpname::createNameIndex(const AliasList& list, int32_t& nameIndexCount) { // Build name => enum map // This is an n->1 map. There are typically multiple names // mapping to one enum. The name index is sorted in order of the name, // as defined by the uprv_compareAliasNames() function. int32_t i, j; int32_t count = list.count(); // compute upper limit on number of names in the index int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP; NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity); nameIndexCount = 0; int32_t names[MAX_NAMES_PER_GROUP]; for (i=0; i<count; ++i) { const Alias& p = list[i]; int32_t n = p.getUniqueNames(names); for (j=0; j<n; ++j) { U_ASSERT(nameIndexCount < nameIndexCapacity); nameIndex[nameIndexCount++] = NameToEnumEntry(names[j], p.enumValue); } } /* * use a stable sort to ensure consistent results between * genpname.cpp and the propname.cpp swapping code */ UErrorCode errorCode = U_ZERO_ERROR; uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]), compareNameToEnumEntry, NULL, TRUE, &errorCode); if (debug>1) { printf("Alias names: %d\n", (int)nameIndexCount); for (i=0; i<nameIndexCount; ++i) { printf("%s => %d\n", STRING_TABLE[nameIndex[i].nameIndex].str, (int)nameIndex[i].enumValue); } printf("\n"); } // make sure there are no duplicates. for a sorted list we need // only compare adjacent items. Alias.getUniqueNames() has // already eliminated duplicate names for a single property, which // does occur, so we're checking for duplicate names between two // properties, which should never occur. UBool ok = TRUE; for (i=1; i<nameIndexCount; ++i) { if (STRING_TABLE[nameIndex[i-1].nameIndex] == STRING_TABLE[nameIndex[i].nameIndex]) { printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n", STRING_TABLE[nameIndex[i-1].nameIndex].str, STRING_TABLE[nameIndex[i].nameIndex].str); ok = FALSE; } } if (!ok) { die("Two or more duplicate names in property list"); } return nameIndex; } EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) { // Build the enum => name map // This is a 1->n map. Each enum maps to 1 or more names. To // accomplish this the index entry points to an element of the // NAME_GROUP array. This is the short name (which may be empty). // From there, subsequent elements of NAME_GROUP are alternate // names for this enum, up to and including the first one that is // negative (negate for actual index). int32_t i, j, k; int32_t count = list.count(); EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count); for (i=0; i<count; ++i) { const Alias& p = list[i]; enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex); } UErrorCode errorCode = U_ZERO_ERROR; uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]), compareEnumToNameGroupEntry, NULL, FALSE, &errorCode); if (debug>1) { printf("Property enums: %d\n", (int)count); for (i=0; i<count; ++i) { printf("%d => %d: ", (int)enumIndex[i].enumValue, (int)enumIndex[i].nameGroupIndex); UBool done = FALSE; for (j=enumIndex[i].nameGroupIndex; !done; ++j) { k = NAME_GROUP[j]; if (k < 0) { k = -k; done = TRUE; } printf("\"%s\"", STRING_TABLE[k].str); if (!done) printf(", "); } printf("\n"); } printf("\n"); } return enumIndex; } int genpname::MMain(int argc, char* argv[]) { int32_t i, j; UErrorCode status = U_ZERO_ERROR; u_init(&status); if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status)); status = U_ZERO_ERROR; } /* preset then read command line options */ options[3].value=u_getDataDirectory(); argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); /* error handling, printing usage message */ if (argc<0) { fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); } debug = options[5].doesOccur ? (*options[5].value - '0') : 0; if (argc!=1 || options[0].doesOccur || options[1].doesOccur || debug < 0 || debug > 9) { fprintf(stderr, "usage: %s [-options]\n" "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n" "options:\n" "\t-h or -? or --help this usage text\n" "\t-v or --verbose turn on verbose output\n" "\t-c or --copyright include a copyright notice\n" "\t-d or --destdir destination directory, followed by the path\n" "\t-D or --debug 0..9 emit debugging messages (if > 0)\n", argv[0]); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } /* get the options values */ useCopyright=options[2].doesOccur; verbose = options[4].doesOccur; // ------------------------------------------------------------ // Do not sort the string table, instead keep it in data.h order. // This simplifies data swapping and testing thereof because the string // table itself need not be sorted during swapping. // The NameToEnum sorter sorts each such map's string offsets instead. if (debug>1) { printf("String pool: %d\n", (int)STRING_COUNT); for (i=0; i<STRING_COUNT; ++i) { if (i != 0) { printf(", "); } printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index); } printf("\n\n"); } // ------------------------------------------------------------ // Create top-level property indices PropertyArrayList props(PROPERTY, PROPERTY_COUNT); int32_t propNameCount; NameToEnumEntry* propName = createNameIndex(props, propNameCount); EnumToNameGroupEntry* propEnum = createEnumIndex(props); // ------------------------------------------------------------ // Create indices for the value list for each enumerated property // This will have more entries than we need... EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT); int32_t enumToValue_count = 0; for (i=0, j=0; i<PROPERTY_COUNT; ++i) { if (PROPERTY[i].valueCount == 0) continue; AliasArrayList values(PROPERTY[i].valueList, PROPERTY[i].valueCount); enumToValue[j].enumValue = PROPERTY[i].enumValue; enumToValue[j].enumToName = createEnumIndex(values); enumToValue[j].enumToName_count = PROPERTY[i].valueCount; enumToValue[j].nameToEnum = createNameIndex(values, enumToValue[j].nameToEnum_count); ++j; } enumToValue_count = j; uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]), compareEnumToValueEntry, NULL, FALSE, &status); // ------------------------------------------------------------ // Build PropertyAliases layout in memory Builder builder(debug); builder.buildTopLevelProperties(propName, propNameCount, propEnum, PROPERTY_COUNT); builder.buildValues(enumToValue, enumToValue_count); builder.buildStringPool(STRING_TABLE, STRING_COUNT, NAME_GROUP, NAME_GROUP_COUNT); builder.fixup(); //////////////////////////////////////////////////////////// // Write the output file //////////////////////////////////////////////////////////// int32_t wlen = writeDataFile(options[3].value, builder); if (verbose) { fprintf(stdout, "Output file: %s.%s, %ld bytes\n", U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen); } return 0; // success } int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) { int32_t length; int8_t* data = builder.createData(length); UNewDataMemory *pdata; UErrorCode status = U_ZERO_ERROR; pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo, useCopyright ? U_COPYRIGHT_STRING : 0, &status); if (U_FAILURE(status)) { die("Unable to create data memory"); } udata_writeBlock(pdata, data, length); int32_t dataLength = (int32_t) udata_finish(pdata, &status); if (U_FAILURE(status)) { die("Error writing output file"); } if (dataLength != length) { die("Written file doesn't match expected size"); } return dataLength; } //eof