unicodedata.c - Android社区 - https://www.androidos.net.cn/

/* ------------------------------------------------------------------------

unicodedata -- Provides access to the Unicode database.

Data was extracted from the UnicodeData.txt file.
   The current version number is reported in the unidata_version constant.

Written by Marc-Andre Lemburg (mal@lemburg.com).
   Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
   Modified by Martin v. Löwis (martin@v.loewis.de)

------------------------------------------------------------------------ */

#define PY_SSIZE_T_CLEAN

#include "Python.h"
#include "ucnhash.h"
#include "structmember.h"

/*[clinic input]
module unicodedata
class unicodedata.UCD 'PreviousDBVersion *' '&UCD_Type'
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6dac153082d150bc]*/

/* character properties */

typedef struct {
    const unsigned char category;       /* index into
                                           _PyUnicode_CategoryNames */
    const unsigned char combining;      /* combining class value 0 - 255 */
    const unsigned char bidirectional;  /* index into
                                           _PyUnicode_BidirectionalNames */
    const unsigned char mirrored;       /* true if mirrored in bidir mode */
    const unsigned char east_asian_width;       /* index into
                                                   _PyUnicode_EastAsianWidth */
    const unsigned char normalization_quick_check; /* see is_normalized() */
} _PyUnicode_DatabaseRecord;

typedef struct change_record {
    /* sequence of fields should be the same as in merge_old_version */
    const unsigned char bidir_changed;
    const unsigned char category_changed;
    const unsigned char decimal_changed;
    const unsigned char mirrored_changed;
    const unsigned char east_asian_width_changed;
    const double numeric_changed;
} change_record;

/* data file generated by Tools/unicode/makeunicodedata.py */
#include "unicodedata_db.h"

static const _PyUnicode_DatabaseRecord*
_getrecord_ex(Py_UCS4 code)
{
    int index;
    if (code >= 0x110000)
        index = 0;
    else {
        index = index1[(code>>SHIFT)];
        index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
    }

return &_PyUnicode_Database_Records[index];
}

/* ------------- Previous-version API ------------------------------------- */
typedef struct previous_version {
    PyObject_HEAD
    const char *name;
    const change_record* (*getrecord)(Py_UCS4);
    Py_UCS4 (*normalization)(Py_UCS4);
} PreviousDBVersion;

#include "clinic/unicodedata.c.h"

#define get_old_record(self, v)    ((((PreviousDBVersion*)self)->getrecord)(v))

static PyMemberDef DB_members[] = {
        {"unidata_version", T_STRING, offsetof(PreviousDBVersion, name), READONLY},
        {NULL}
};

/* forward declaration */
static PyTypeObject UCD_Type;
#define UCD_Check(o) (Py_TYPE(o)==&UCD_Type)

static PyObject*
new_previous_version(const char*name, const change_record* (*getrecord)(Py_UCS4),
                     Py_UCS4 (*normalization)(Py_UCS4))
{
        PreviousDBVersion *self;
        self = PyObject_New(PreviousDBVersion, &UCD_Type);
        if (self == NULL)
                return NULL;
        self->name = name;
        self->getrecord = getrecord;
        self->normalization = normalization;
        return (PyObject*)self;
}

/* --- Module API --------------------------------------------------------- */

/*[clinic input]
unicodedata.UCD.decimal

self: self
    chr: int(accept={str})
    default: object=NULL
    /

Converts a Unicode character into its equivalent decimal value.

Returns the decimal value assigned to the character chr as integer.
If no such value is defined, default is returned, or, if not given,
ValueError is raised.
[clinic start generated code]*/

static PyObject *
unicodedata_UCD_decimal_impl(PyObject *self, int chr,
                             PyObject *default_value)
/*[clinic end generated code: output=be23376e1a185231 input=933f8107993f23d0]*/
{
    int have_old = 0;
    long rc;
    Py_UCS4 c = (Py_UCS4)chr;

if (self && UCD_Check(self)) {
        const change_record *old = get_old_record(self, c);
        if (old->category_changed == 0) {
            /* unassigned */
            have_old = 1;
            rc = -1;
        }
        else if (old->decimal_changed != 0xFF) {
            have_old = 1;
            rc = old->decimal_changed;
        }
    }

if (!have_old)
        rc = Py_UNICODE_TODECIMAL(c);
    if (rc < 0) {
        if (default_value == NULL) {
            PyErr_SetString(PyExc_ValueError,
                            "not a decimal");
            return NULL;
        }
        else {
            Py_INCREF(default_value);
            return default_value;
        }
    }
    return PyLong_FromLong(rc);
}

/*[clinic input]
unicodedata.UCD.digit

self: self
    chr: int(accept={str})
    default: object=NULL
    /

Converts a Unicode character into its equivalent digit value.

Returns the digit value assigned to the character chr as integer.
If no such value is defined, default is returned, or, if not given,
ValueError is raised.
[clinic start generated code]*/

static PyObject *
unicodedata_UCD_digit_impl(PyObject *self, int chr, PyObject *default_value)
/*[clinic end generated code: output=96e18c950171fd2f input=e27d6e4565cd29f2]*/
{
    long rc;
    Py_UCS4 c = (Py_UCS4)chr;
    rc = Py_UNICODE_TODIGIT(c);
    if (rc < 0) {
        if (default_value == NULL) {
            PyErr_SetString(PyExc_ValueError, "not a digit");
            return NULL;
        }
        else {
            Py_INCREF(default_value);
            return default_value;
        }
    }
    return PyLong_FromLong(rc);
}

/*[clinic input]
unicodedata.UCD.numeric

self: self
    chr: int(accept={str})
    default: object=NULL
    /

Converts a Unicode character into its equivalent numeric value.

Returns the numeric value assigned to the character chr as float.
If no such value is defined, default is returned, or, if not given,
ValueError is raised.
[clinic start generated code]*/

static PyObject *
unicodedata_UCD_numeric_impl(PyObject *self, int chr,
                             PyObject *default_value)
/*[clinic end generated code: output=53ce281fe85b10c4 input=fdf5871a5542893c]*/
{
    int have_old = 0;
    double rc;
    Py_UCS4 c = (Py_UCS4)chr;

if (self && UCD_Check(self)) {
        const change_record *old = get_old_record(self, c);
        if (old->category_changed == 0) {
            /* unassigned */
            have_old = 1;
            rc = -1.0;
        }
        else if (old->decimal_changed != 0xFF) {
            have_old = 1;
            rc = old->decimal_changed;
        }
    }

if (!have_old)
        rc = Py_UNICODE_TONUMERIC(c);
    if (rc == -1.0) {
        if (default_value == NULL) {
            PyErr_SetString(PyExc_ValueError, "not a numeric character");
            return NULL;
        }
        else {
            Py_INCREF(default_value);
            return default_value;
        }
    }
    return PyFloat_FromDouble(rc);
}

/*[clinic input]
unicodedata.UCD.category