C++程序  |  216行  |  6.63 KB

#include "Python.h"
#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
typedef int Py_ssize_t;
#define PY_SSIZE_T_MAX INT_MAX
#define PY_SSIZE_T_MIN INT_MIN
#endif

static Py_ssize_t
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
static PyObject *
ascii_escape_unicode(PyObject *pystr);
static PyObject *
ascii_escape_str(PyObject *pystr);
static PyObject *
py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);
void init_speedups(void);

#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"')

#define MIN_EXPANSION 6
#ifdef Py_UNICODE_WIDE
#define MAX_EXPANSION (2 * MIN_EXPANSION)
#else
#define MAX_EXPANSION MIN_EXPANSION
#endif

static Py_ssize_t
ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) {
    Py_UNICODE x;
    output[chars++] = '\\';
    switch (c) {
        case '/': output[chars++] = (char)c; break;
        case '\\': output[chars++] = (char)c; break;
        case '"': output[chars++] = (char)c; break;
        case '\b': output[chars++] = 'b'; break;
        case '\f': output[chars++] = 'f'; break;
        case '\n': output[chars++] = 'n'; break;
        case '\r': output[chars++] = 'r'; break;
        case '\t': output[chars++] = 't'; break;
        default:
#ifdef Py_UNICODE_WIDE
            if (c >= 0x10000) {
                /* UTF-16 surrogate pair */
                Py_UNICODE v = c - 0x10000;
                c = 0xd800 | ((v >> 10) & 0x3ff);
                output[chars++] = 'u';
                x = (c & 0xf000) >> 12;
                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
                x = (c & 0x0f00) >> 8;
                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
                x = (c & 0x00f0) >> 4;
                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
                x = (c & 0x000f);
                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
                c = 0xdc00 | (v & 0x3ff);
                output[chars++] = '\\';
            }
#endif
            output[chars++] = 'u';
            x = (c & 0xf000) >> 12;
            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
            x = (c & 0x0f00) >> 8;
            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
            x = (c & 0x00f0) >> 4;
            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
            x = (c & 0x000f);
            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
    }
    return chars;
}

static PyObject *
ascii_escape_unicode(PyObject *pystr) {
    Py_ssize_t i;
    Py_ssize_t input_chars;
    Py_ssize_t output_size;
    Py_ssize_t chars;
    PyObject *rval;
    char *output;
    Py_UNICODE *input_unicode;

    input_chars = PyUnicode_GET_SIZE(pystr);
    input_unicode = PyUnicode_AS_UNICODE(pystr);
    /* One char input can be up to 6 chars output, estimate 4 of these */
    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
    rval = PyString_FromStringAndSize(NULL, output_size);
    if (rval == NULL) {
        return NULL;
    }
    output = PyString_AS_STRING(rval);
    chars = 0;
    output[chars++] = '"';
    for (i = 0; i < input_chars; i++) {
        Py_UNICODE c = input_unicode[i];
        if (S_CHAR(c)) {
            output[chars++] = (char)c;
        } else {
            chars = ascii_escape_char(c, output, chars);
        }
        if (output_size - chars < (1 + MAX_EXPANSION)) {
            /* There's more than four, so let's resize by a lot */
            output_size *= 2;
            /* This is an upper bound */
            if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
                output_size = 2 + (input_chars * MAX_EXPANSION);
            }
            if (_PyString_Resize(&rval, output_size) == -1) {
                return NULL;
            }
            output = PyString_AS_STRING(rval);
        }
    }
    output[chars++] = '"';
    if (_PyString_Resize(&rval, chars) == -1) {
        return NULL;
    }
    return rval;
}

static PyObject *
ascii_escape_str(PyObject *pystr) {
    Py_ssize_t i;
    Py_ssize_t input_chars;
    Py_ssize_t output_size;
    Py_ssize_t chars;
    PyObject *rval;
    char *output;
    char *input_str;

    input_chars = PyString_GET_SIZE(pystr);
    input_str = PyString_AS_STRING(pystr);
    /* One char input can be up to 6 chars output, estimate 4 of these */
    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
    rval = PyString_FromStringAndSize(NULL, output_size);
    if (rval == NULL) {
        return NULL;
    }
    output = PyString_AS_STRING(rval);
    chars = 0;
    output[chars++] = '"';
    for (i = 0; i < input_chars; i++) {
        Py_UNICODE c = (Py_UNICODE)input_str[i];
        if (S_CHAR(c)) {
            output[chars++] = (char)c;
        } else if (c > 0x7F) {
            /* We hit a non-ASCII character, bail to unicode mode */
            PyObject *uni;
            Py_DECREF(rval);
            uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
            if (uni == NULL) {
                return NULL;
            }
            rval = ascii_escape_unicode(uni);
            Py_DECREF(uni);
            return rval;
        } else {
            chars = ascii_escape_char(c, output, chars);
        }
        /* An ASCII char can't possibly expand to a surrogate! */
        if (output_size - chars < (1 + MIN_EXPANSION)) {
            /* There's more than four, so let's resize by a lot */
            output_size *= 2;
            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
                output_size = 2 + (input_chars * MIN_EXPANSION);
            }
            if (_PyString_Resize(&rval, output_size) == -1) {
                return NULL;
            }
            output = PyString_AS_STRING(rval);
        }
    }
    output[chars++] = '"';
    if (_PyString_Resize(&rval, chars) == -1) {
        return NULL;
    }
    return rval;
}

PyDoc_STRVAR(pydoc_encode_basestring_ascii,
    "encode_basestring_ascii(basestring) -> str\n"
    "\n"
    "..."
);

static PyObject *
py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) {
    /* METH_O */
    if (PyString_Check(pystr)) {
        return ascii_escape_str(pystr);
    } else if (PyUnicode_Check(pystr)) {
        return ascii_escape_unicode(pystr);
    }
    PyErr_SetString(PyExc_TypeError, "first argument must be a string");
    return NULL;
}

#define DEFN(n, k) \
    {  \
        #n, \
        (PyCFunction)py_ ##n, \
        k, \
        pydoc_ ##n \
    }
static PyMethodDef speedups_methods[] = {
    DEFN(encode_basestring_ascii, METH_O),
    {}
};
#undef DEFN

void
init_speedups(void)
{
    PyObject *m;
    m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);
}