import sys
from unicode_parse_common import *
# http://www.unicode.org/Public/5.1.0/ucd/Scripts.txt
script_to_harfbuzz = {
# This is the list of HB_Script_* at the time of writing
'Common': 'HB_Script_Common',
'Greek': 'HB_Script_Greek',
'Cyrillic': 'HB_Script_Cyrillic',
'Armenian': 'HB_Script_Armenian',
'Hebrew': 'HB_Script_Hebrew',
'Arabic': 'HB_Script_Arabic',
'Syriac': 'HB_Script_Syriac',
'Thaana': 'HB_Script_Thaana',
'Devanagari': 'HB_Script_Devanagari',
'Bengali': 'HB_Script_Bengali',
'Gurmukhi': 'HB_Script_Gurmukhi',
'Gujarati': 'HB_Script_Gujarati',
'Oriya': 'HB_Script_Oriya',
'Tamil': 'HB_Script_Tamil',
'Telugu': 'HB_Script_Telugu',
'Kannada': 'HB_Script_Kannada',
'Malayalam': 'HB_Script_Malayalam',
'Sinhala': 'HB_Script_Sinhala',
'Thai': 'HB_Script_Thai',
'Lao': 'HB_Script_Lao',
'Tibetan': 'HB_Script_Tibetan',
'Myanmar': 'HB_Script_Myanmar',
'Georgian': 'HB_Script_Georgian',
'Hangul': 'HB_Script_Hangul',
'Ogham': 'HB_Script_Ogham',
'Runic': 'HB_Script_Runic',
'Khmer': 'HB_Script_Khmer',
'Inherited': 'HB_Script_Inherited',
}
class ScriptDict(object):
def __init__(self, base):
self.base = base
def __getitem__(self, key):
r = self.base.get(key, None)
if r is None:
return 'HB_Script_Common'
return r
def main(infile, outfile):
ranges = unicode_file_parse(infile,
ScriptDict(script_to_harfbuzz),
'HB_Script_Common')
ranges = sort_and_merge(ranges)
print >>outfile, '// Generated from Unicode script tables\n'
print >>outfile, '#ifndef SCRIPT_PROPERTIES_H_'
print >>outfile, '#define SCRIPT_PROPERTIES_H_\n'
print >>outfile, '#include <stdint.h>'
print >>outfile, '#include "harfbuzz-shaper.h"\n'
print >>outfile, 'struct script_property {'
print >>outfile, ' uint32_t range_start;'
print >>outfile, ' uint32_t range_end;'
print >>outfile, ' HB_Script script;'
print >>outfile, '};\n'
print >>outfile, 'static const struct script_property script_properties[] = {'
for (start, end, value) in ranges:
print >>outfile, ' {0x%x, 0x%x, %s},' % (start, end, value)
print >>outfile, '};\n'
print >>outfile, 'static const unsigned script_properties_count = %d;\n' % len(ranges)
print >>outfile, '#endif // SCRIPT_PROPERTIES_H_'
if __name__ == '__main__':
if len(sys.argv) != 3:
print 'Usage: %s <input .txt> <output .h>' % sys.argv[0]
else:
main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+'))