/* ******************************************************************************* * Copyright (C) 2013, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uscript_props.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2013feb16 * created by: Markus W. Scherer */ #include "unicode/utypes.h" #include "unicode/unistr.h" #include "unicode/uscript.h" #include "unicode/utf16.h" #include "ustr_imp.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) namespace { // Script metadata (script properties). // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt // 0 = NOT_ENCODED, no sample character, default false script properties. // Bits 20.. 0: sample character // Bits 23..21: usage const int32_t UNKNOWN = 1 << 21; const int32_t EXCLUSION = 2 << 21; const int32_t LIMITED_USE = 3 << 21; const int32_t ASPIRATIONAL = 4 << 21; const int32_t RECOMMENDED = 5 << 21; // Bits 31..24: Single-bit flags const int32_t RTL = 1 << 24; const int32_t LB_LETTERS = 1 << 25; const int32_t CASED = 1 << 26; const int32_t SCRIPT_PROPS[] = { // Begin copy-paste output from // tools/trunk/unicode/py/parsescriptmetadata.py 0x0040 | UNKNOWN, // Zyyy 0x0308 | UNKNOWN, // Zinh 0x0628 | RECOMMENDED | RTL, // Arab 0x0531 | RECOMMENDED | CASED, // Armn 0x0995 | RECOMMENDED, // Beng 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo 0x13C4 | LIMITED_USE, // Cher 0x03E2 | EXCLUSION | CASED, // Copt 0x042F | RECOMMENDED | CASED, // Cyrl 0x10414 | EXCLUSION | CASED, // Dsrt 0x0905 | RECOMMENDED, // Deva 0x12A0 | RECOMMENDED, // Ethi 0x10D3 | RECOMMENDED, // Geor 0x10330 | EXCLUSION, // Goth 0x03A9 | RECOMMENDED | CASED, // Grek 0x0A95 | RECOMMENDED, // Gujr 0x0A15 | RECOMMENDED, // Guru 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani 0xAC00 | RECOMMENDED, // Hang 0x05D0 | RECOMMENDED | RTL, // Hebr 0x304B | RECOMMENDED | LB_LETTERS, // Hira 0x0C95 | RECOMMENDED, // Knda 0x30AB | RECOMMENDED | LB_LETTERS, // Kana 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo 0x004C | RECOMMENDED | CASED, // Latn 0x0D15 | RECOMMENDED, // Mlym 0x1826 | ASPIRATIONAL, // Mong 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr 0x168F | EXCLUSION, // Ogam 0x10300 | EXCLUSION, // Ital 0x0B15 | RECOMMENDED, // Orya 0x16A0 | EXCLUSION, // Runr 0x0D85 | RECOMMENDED, // Sinh 0x0710 | LIMITED_USE | RTL, // Syrc 0x0B95 | RECOMMENDED, // Taml 0x0C15 | RECOMMENDED, // Telu 0x078C | RECOMMENDED | RTL, // Thaa 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai 0x0F40 | RECOMMENDED, // Tibt 0x14C0 | ASPIRATIONAL, // Cans 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii 0x1703 | EXCLUSION, // Tglg 0x1723 | EXCLUSION, // Hano 0x1743 | EXCLUSION, // Buhd 0x1763 | EXCLUSION, // Tagb 0x2800 | UNKNOWN, // Brai 0x10800 | EXCLUSION | RTL, // Cprt 0x1900 | LIMITED_USE, // Limb 0x10000 | EXCLUSION, // Linb 0x10480 | EXCLUSION, // Osma 0x10450 | EXCLUSION, // Shaw 0x1950 | LIMITED_USE | LB_LETTERS, // Tale 0x10380 | EXCLUSION, // Ugar 0, 0x1A00 | EXCLUSION, // Bugi 0x2C00 | EXCLUSION | CASED, // Glag 0x10A00 | EXCLUSION | RTL, // Khar 0xA800 | LIMITED_USE, // Sylo 0x1980 | LIMITED_USE | LB_LETTERS, // Talu 0x2D30 | ASPIRATIONAL, // Tfng 0x103A0 | EXCLUSION, // Xpeo 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali 0x1BC0 | LIMITED_USE, // Batk 0, 0x11005 | EXCLUSION, // Brah 0xAA00 | LIMITED_USE, // Cham 0, 0, 0, 0, 0x13153 | EXCLUSION, // Egyp 0, 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant 0, 0, 0, 0xA984 | LIMITED_USE | LB_LETTERS, // Java 0xA90A | LIMITED_USE, // Kali 0, 0, 0x1C00 | LIMITED_USE, // Lepc 0, 0x0840 | LIMITED_USE | RTL, // Mand 0, 0x10980 | EXCLUSION | RTL, // Mero 0x07CA | LIMITED_USE | RTL, // Nkoo 0x10C00 | EXCLUSION | RTL, // Orkh 0, 0xA840 | EXCLUSION, // Phag 0x10900 | EXCLUSION | RTL, // Phnx 0x16F00 | ASPIRATIONAL, // Plrd 0, 0, 0, 0, 0, 0, 0xA549 | LIMITED_USE, // Vaii 0, 0x12000 | EXCLUSION, // Xsux 0, 0xFDD0 | UNKNOWN, // Zzzz 0x102A0 | EXCLUSION, // Cari 0x304B | RECOMMENDED | LB_LETTERS, // Jpan 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana 0x10280 | EXCLUSION, // Lyci 0x10920 | EXCLUSION | RTL, // Lydi 0x1C5A | LIMITED_USE, // Olck 0xA930 | EXCLUSION, // Rjng 0xA882 | LIMITED_USE, // Saur 0, 0x1B83 | LIMITED_USE, // Sund 0, 0xABC0 | LIMITED_USE, // Mtei 0x10840 | EXCLUSION | RTL, // Armi 0x10B00 | EXCLUSION | RTL, // Avst 0x11103 | LIMITED_USE, // Cakm 0xAC00 | RECOMMENDED, // Kore 0x11083 | EXCLUSION, // Kthi 0, 0x10B60 | EXCLUSION | RTL, // Phli 0, 0, 0x10B40 | EXCLUSION | RTL, // Prti 0x0800 | EXCLUSION | RTL, // Samr 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt 0, 0, 0xA6A0 | LIMITED_USE, // Bamu 0xA4D0 | LIMITED_USE, // Lisu 0, 0x10A60 | EXCLUSION | RTL, // Sarb 0, 0, 0, 0, 0, 0, 0, 0x109A0 | EXCLUSION | RTL, // Merc 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x11183 | EXCLUSION, // Shrd 0x110D0 | EXCLUSION, // Sora 0x11680 | EXCLUSION, // Takr 0, 0, 0, 0, 0, // End copy-paste from parsescriptmetadata.py }; int32_t getScriptProps(UScriptCode script) { if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) { return SCRIPT_PROPS[script]; } else { return 0; } } } // namespace U_CAPI int32_t U_EXPORT2 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return 0; } if(capacity < 0 || (capacity > 0 && dest == NULL)) { *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return 0; } int32_t sampleChar = getScriptProps(script) & 0x1fffff; int32_t length; if(sampleChar == 0) { length = 0; } else { length = U16_LENGTH(sampleChar); if(length <= capacity) { int32_t i = 0; U16_APPEND_UNSAFE(dest, i, sampleChar); } } return u_terminateUChars(dest, capacity, length, pErrorCode); } U_COMMON_API icu::UnicodeString U_EXPORT2 uscript_getSampleUnicodeString(UScriptCode script) { icu::UnicodeString sample; int32_t sampleChar = getScriptProps(script) & 0x1fffff; if(sampleChar != 0) { sample.append(sampleChar); } return sample; } U_CAPI UScriptUsage U_EXPORT2 uscript_getUsage(UScriptCode script) { return (UScriptUsage)((getScriptProps(script) >> 21) & 7); } U_CAPI UBool U_EXPORT2 uscript_isRightToLeft(UScriptCode script) { return (getScriptProps(script) & RTL) != 0; } U_CAPI UBool U_EXPORT2 uscript_breaksBetweenLetters(UScriptCode script) { return (getScriptProps(script) & LB_LETTERS) != 0; } U_CAPI UBool U_EXPORT2 uscript_isCased(UScriptCode script) { return (getScriptProps(script) & CASED) != 0; }