C++程序  |  611行  |  20.56 KB

#include "XLIFFFile.h"

#include <algorithm>
#include <sys/time.h>
#include <time.h>
#include <cstdio>

const char* const XLIFF_XMLNS = "urn:oasis:names:tc:xliff:document:1.2";

const char *const NS_MAP[] = {
    "", XLIFF_XMLNS,
    "xml", XMLNS_XMLNS,
    NULL, NULL
};

const XMLNamespaceMap XLIFF_NAMESPACES(NS_MAP);

int
XLIFFFile::File::Compare(const XLIFFFile::File& that) const
{
    if (filename != that.filename) {
        return filename < that.filename ? -1 : 1;
    }
    return 0;
}

// =====================================================================================
XLIFFFile::XLIFFFile()
{
}

XLIFFFile::~XLIFFFile()
{
}

static XMLNode*
get_unique_node(const XMLNode* parent, const string& ns, const string& name, bool required)
{
    size_t count = parent->CountElementsByName(ns, name);
    if (count == 1) {
        return parent->GetElementByNameAt(ns, name, 0);
    } else {
        if (required) {
            SourcePos pos = count == 0
                                ? parent->Position()
                                : parent->GetElementByNameAt(XLIFF_XMLNS, name, 1)->Position();
            pos.Error("<%s> elements must contain exactly one <%s> element",
                                parent->Name().c_str(), name.c_str());
        }
        return NULL;
    }
}

XLIFFFile*
XLIFFFile::Parse(const string& filename)
{
    XLIFFFile* result = new XLIFFFile();

    XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
    if (root == NULL) {
        return NULL;
    }

    // <file>
    vector<XMLNode*> files = root->GetElementsByName(XLIFF_XMLNS, "file");
    for (size_t i=0; i<files.size(); i++) {
        XMLNode* file = files[i];

        string datatype = file->GetAttribute("", "datatype", "");
        string originalFile = file->GetAttribute("", "original", "");

        Configuration sourceConfig;
        sourceConfig.locale = file->GetAttribute("", "source-language", "");
        result->m_sourceConfig = sourceConfig;

        Configuration targetConfig;
        targetConfig.locale = file->GetAttribute("", "target-language", "");
        result->m_targetConfig = targetConfig;

        result->m_currentVersion = file->GetAttribute("", "build-num", "");
        result->m_oldVersion = "old";

        // <body>
        XMLNode* body = get_unique_node(file, XLIFF_XMLNS, "body", true);
        if (body == NULL) continue;

        // <trans-unit>
        vector<XMLNode*> transUnits = body->GetElementsByName(XLIFF_XMLNS, "trans-unit");
        for (size_t j=0; j<transUnits.size(); j++) {
            XMLNode* transUnit = transUnits[j];

            string rawID = transUnit->GetAttribute("", "id", "");
            if (rawID == "") {
                transUnit->Position().Error("<trans-unit> tag requires an id");
                continue;
            }
            string id;
            int index;

            if (!StringResource::ParseTypedID(rawID, &id, &index)) {
                transUnit->Position().Error("<trans-unit> has invalid id '%s'\n", rawID.c_str());
                continue;
            }

            // <source>
            XMLNode* source = get_unique_node(transUnit, XLIFF_XMLNS, "source", false);
            if (source != NULL) {
                XMLNode* node = source->Clone();
                node->SetPrettyRecursive(XMLNode::EXACT);
                result->AddStringResource(StringResource(source->Position(), originalFile,
                            sourceConfig, id, index, node, CURRENT_VERSION,
                            result->m_currentVersion));
            }

            // <target>
            XMLNode* target = get_unique_node(transUnit, XLIFF_XMLNS, "target", false);
            if (target != NULL) {
                XMLNode* node = target->Clone();
                node->SetPrettyRecursive(XMLNode::EXACT);
                result->AddStringResource(StringResource(target->Position(), originalFile,
                            targetConfig, id, index, node, CURRENT_VERSION,
                            result->m_currentVersion));
            }

            // <alt-trans>
            XMLNode* altTrans = get_unique_node(transUnit, XLIFF_XMLNS, "alt-trans", false);
            if (altTrans != NULL) {
                // <source>
                XMLNode* altSource = get_unique_node(altTrans, XLIFF_XMLNS, "source", false);
                if (altSource != NULL) {
                    XMLNode* node = altSource->Clone();
                    node->SetPrettyRecursive(XMLNode::EXACT);
                    result->AddStringResource(StringResource(altSource->Position(),
                                originalFile, sourceConfig, id, index, node, OLD_VERSION,
                                result->m_oldVersion));
                }

                // <target>
                XMLNode* altTarget = get_unique_node(altTrans, XLIFF_XMLNS, "target", false);
                if (altTarget != NULL) {
                    XMLNode* node = altTarget->Clone();
                    node->SetPrettyRecursive(XMLNode::EXACT);
                    result->AddStringResource(StringResource(altTarget->Position(),
                                originalFile, targetConfig, id, index, node, OLD_VERSION,
                                result->m_oldVersion));
                }
            }
        }
    }
    delete root;
    return result;
}

XLIFFFile*
XLIFFFile::Create(const Configuration& sourceConfig, const Configuration& targetConfig,
                                const string& currentVersion)
{
    XLIFFFile* result = new XLIFFFile();
        result->m_sourceConfig = sourceConfig;
        result->m_targetConfig = targetConfig;
        result->m_currentVersion = currentVersion;
    return result;
}

set<string>
XLIFFFile::Files() const
{
    set<string> result;
    for (vector<File>::const_iterator f = m_files.begin(); f != m_files.end(); f++) {
        result.insert(f->filename);
    }
    return result;
}

void
XLIFFFile::AddStringResource(const StringResource& str)
{
    string id = str.TypedID();

    File* f = NULL;
    const size_t I = m_files.size();
    for (size_t i=0; i<I; i++) {
        if (m_files[i].filename == str.file) {
            f = &m_files[i];
            break;
        }
    }
    if (f == NULL) {
        File file;
        file.filename = str.file;
        m_files.push_back(file);
        f = &m_files[I];
    }

    const size_t J = f->transUnits.size();
    TransUnit* g = NULL;
    for (size_t j=0; j<J; j++) {
        if (f->transUnits[j].id == id) {
            g = &f->transUnits[j];
        }
    }
    if (g == NULL) {
        TransUnit group;
        group.id = id;
        f->transUnits.push_back(group);
        g = &f->transUnits[J];
    }

    StringResource* res = find_string_res(*g, str);
    if (res == NULL) {
        return ;
    }
    if (res->id != "") {
        str.pos.Error("Duplicate string resource: %s", res->id.c_str());
        res->pos.Error("Previous definition here");
        return ;
    }
    *res = str;

    m_strings.insert(str);
}

void
XLIFFFile::Filter(bool (*func)(const string&,const TransUnit&,void*), void* cookie)
{
    const size_t I = m_files.size();
    for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
        File& file = m_files[i];

        const size_t J = file.transUnits.size();
        for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
            TransUnit& tu = file.transUnits[j];

            bool keep = func(file.filename, tu, cookie);
            if (!keep) {
                if (tu.source.id != "") {
                    m_strings.erase(tu.source);
                }
                if (tu.target.id != "") {
                    m_strings.erase(tu.target);
                }
                if (tu.altSource.id != "") {
                    m_strings.erase(tu.altSource);
                }
                if (tu.altTarget.id != "") {
                    m_strings.erase(tu.altTarget);
                }
                file.transUnits.erase(file.transUnits.begin()+j);
            }
        }
        if (file.transUnits.size() == 0) {
            m_files.erase(m_files.begin()+i);
        }
    }
}

void
XLIFFFile::Map(void (*func)(const string&,TransUnit*,void*), void* cookie)
{
    const size_t I = m_files.size();
    for (size_t i=0; i<I; i++) {
        File& file = m_files[i];

        const size_t J = file.transUnits.size();
        for (size_t j=0; j<J; j++) {
            func(file.filename, &(file.transUnits[j]), cookie);
        }
    }
}

TransUnit*
XLIFFFile::EditTransUnit(const string& filename, const string& id)
{
    const size_t I = m_files.size();
    for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
        File& file = m_files[i];
        if (file.filename == filename) {
            const size_t J = file.transUnits.size();
            for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
                TransUnit& tu = file.transUnits[j];
                if (tu.id == id) {
                    return &tu;
                }
            }
        }
    }
    return NULL;
}

StringResource*
XLIFFFile::find_string_res(TransUnit& g, const StringResource& str)
{
    int index;
    if (str.version == CURRENT_VERSION) {
        index = 0;
    }
    else if (str.version == OLD_VERSION) {
        index = 2;
    }
    else {
        str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
        return NULL;
    }
    if (str.config == m_sourceConfig) {
        // index += 0;
    }
    else if (str.config == m_targetConfig) {
        index += 1;
    }
    else {
        str.pos.Error("unknown config for string %s: %s", str.id.c_str(),
                            str.config.ToString().c_str());
        return NULL;
    }
    switch (index) {
        case 0:
            return &g.source;
        case 1:
            return &g.target;
        case 2:
            return &g.altSource;
        case 3:
            return &g.altTarget;
    }
    str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
    return NULL;
}

int
convert_html_to_xliff(const XMLNode* original, const string& name, XMLNode* addTo, int* phID)
{
    int err = 0;
    if (original->Type() == XMLNode::TEXT) {
        addTo->EditChildren().push_back(original->Clone());
        return 0;
    } else {
        string ctype;
        if (original->Namespace() == "") {
            if (original->Name() == "b") {
                ctype = "bold";
            }
            else if (original->Name() == "i") {
                ctype = "italic";
            }
            else if (original->Name() == "u") {
                ctype = "underline";
            }
        }
        if (ctype != "") {
            vector<XMLAttribute> attrs;
            attrs.push_back(XMLAttribute(XLIFF_XMLNS, "ctype", ctype));
            XMLNode* copy = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, "g",
                                                attrs, XMLNode::EXACT);

            const vector<XMLNode*>& children = original->Children();
            size_t I = children.size();
            for (size_t i=0; i<I; i++) {
                err |= convert_html_to_xliff(children[i], name, copy, phID);
            }
            return err;
        }
        else {
            if (original->Namespace() == XLIFF_XMLNS) {
                addTo->EditChildren().push_back(original->Clone());
                return 0;
            } else {
                if (original->Namespace() == "") {
                    // flatten out the tag into ph tags -- but only if there is no namespace
                    // that's still unsupported because propagating the xmlns attribute is hard.
                    vector<XMLAttribute> attrs;
                    char idStr[30];
                    (*phID)++;
                    sprintf(idStr, "id-%d", *phID);
                    attrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", idStr));

                    if (original->Children().size() == 0) {
                        XMLNode* ph = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
                                "ph", attrs, XMLNode::EXACT);
                        ph->EditChildren().push_back(
                                XMLNode::NewText(original->Position(),
                                    original->ToString(XLIFF_NAMESPACES),
                                    XMLNode::EXACT));
                        addTo->EditChildren().push_back(ph);
                    } else {
                        XMLNode* begin = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
                                "bpt", attrs, XMLNode::EXACT);
                        begin->EditChildren().push_back(
                                XMLNode::NewText(original->Position(),
                                    original->OpenTagToString(XLIFF_NAMESPACES, XMLNode::EXACT),
                                    XMLNode::EXACT));
                        XMLNode* end = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
                                "ept", attrs, XMLNode::EXACT);
                        string endText = "</";
                            endText += original->Name();
                            endText += ">";
                        end->EditChildren().push_back(XMLNode::NewText(original->Position(),
                                endText, XMLNode::EXACT));

                        addTo->EditChildren().push_back(begin);

                        const vector<XMLNode*>& children = original->Children();
                        size_t I = children.size();
                        for (size_t i=0; i<I; i++) {
                            err |= convert_html_to_xliff(children[i], name, addTo, phID);
                        }

                        addTo->EditChildren().push_back(end);
                    }
                    return err;
                } else {
                    original->Position().Error("invalid <%s> element in <%s> tag\n",
                                                original->Name().c_str(), name.c_str());
                    return 1;
                }
            }
        }
    }
}

XMLNode*
create_string_node(const StringResource& str, const string& name)
{
    vector<XMLAttribute> attrs;
    attrs.push_back(XMLAttribute(XMLNS_XMLNS, "space", "preserve"));
    XMLNode* node = XMLNode::NewElement(str.pos, XLIFF_XMLNS, name, attrs, XMLNode::EXACT);

    const vector<XMLNode*>& children = str.value->Children();
    size_t I = children.size();
    int err = 0;
    for (size_t i=0; i<I; i++) {
        int phID = 0;
        err |= convert_html_to_xliff(children[i], name, node, &phID);
    }

    if (err != 0) {
        delete node;
    }
    return node;
}

static bool
compare_id(const TransUnit& lhs, const TransUnit& rhs)
{
    string lid, rid;
    int lindex, rindex;
    StringResource::ParseTypedID(lhs.id, &lid, &lindex);
    StringResource::ParseTypedID(rhs.id, &rid, &rindex);
    if (lid < rid) return true;
    if (lid == rid && lindex < rindex) return true;
    return false;
}

XMLNode*
XLIFFFile::ToXMLNode() const
{
    XMLNode* root;
    size_t N;

    // <xliff>
    {
        vector<XMLAttribute> attrs;
        XLIFF_NAMESPACES.AddToAttributes(&attrs);
        attrs.push_back(XMLAttribute(XLIFF_XMLNS, "version", "1.2"));
        root = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "xliff", attrs, XMLNode::PRETTY);
    }

    vector<TransUnit> groups;

    // <file>
    vector<File> files = m_files;
    sort(files.begin(), files.end());
    const size_t I = files.size();
    for (size_t i=0; i<I; i++) {
        const File& file = files[i];

        vector<XMLAttribute> fileAttrs;
        fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "datatype", "x-android-res"));
        fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "original", file.filename));

        struct timeval tv;
        struct timezone tz;
        gettimeofday(&tv, &tz);
        fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "date", trim_string(ctime(&tv.tv_sec))));

        fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "source-language", m_sourceConfig.locale));
        fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "target-language", m_targetConfig.locale));
        fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "build-num", m_currentVersion));

        XMLNode* fileNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "file", fileAttrs,
                                                XMLNode::PRETTY);
        root->EditChildren().push_back(fileNode);

        // <body>
        XMLNode* bodyNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "body",
                                                vector<XMLAttribute>(), XMLNode::PRETTY);
        fileNode->EditChildren().push_back(bodyNode);

        // <trans-unit>
        vector<TransUnit> transUnits = file.transUnits;
        sort(transUnits.begin(), transUnits.end(), compare_id);
        const size_t J = transUnits.size();
        for (size_t j=0; j<J; j++) {
            const TransUnit& transUnit = transUnits[j];

            vector<XMLAttribute> tuAttrs;

            // strings start with string:
            tuAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", transUnit.id));
            XMLNode* transUnitNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "trans-unit",
                                                         tuAttrs, XMLNode::PRETTY);
            bodyNode->EditChildren().push_back(transUnitNode);

            // <extradata>
            if (transUnit.source.comment != "") {
                vector<XMLAttribute> extradataAttrs;
                XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "extradata",
                                                            extradataAttrs, XMLNode::EXACT);
                transUnitNode->EditChildren().push_back(extraNode);
                extraNode->EditChildren().push_back(
                        XMLNode::NewText(GENERATED_POS, transUnit.source.comment,
                                         XMLNode::PRETTY));
            }

            // <source>
            if (transUnit.source.id != "") {
                transUnitNode->EditChildren().push_back(
                                    create_string_node(transUnit.source, "source"));
            }
            
            // <target>
            if (transUnit.target.id != "") {
                transUnitNode->EditChildren().push_back(
                                    create_string_node(transUnit.target, "target"));
            }

            // <alt-trans>
            if (transUnit.altSource.id != "" || transUnit.altTarget.id != ""
                    || transUnit.rejectComment != "") {
                vector<XMLAttribute> altTransAttrs;
                XMLNode* altTransNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "alt-trans",
                                                            altTransAttrs, XMLNode::PRETTY);
                transUnitNode->EditChildren().push_back(altTransNode);

                // <extradata>
                if (transUnit.rejectComment != "") {
                    vector<XMLAttribute> extradataAttrs;
                    XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS,
                                                                "extradata", extradataAttrs,
                                                                XMLNode::EXACT);
                    altTransNode->EditChildren().push_back(extraNode);
                    extraNode->EditChildren().push_back(
                            XMLNode::NewText(GENERATED_POS, transUnit.rejectComment,
                                             XMLNode::PRETTY));
                }
                
                // <source>
                if (transUnit.altSource.id != "") {
                    altTransNode->EditChildren().push_back(
                                        create_string_node(transUnit.altSource, "source"));
                }
                
                // <target>
                if (transUnit.altTarget.id != "") {
                    altTransNode->EditChildren().push_back(
                                        create_string_node(transUnit.altTarget, "target"));
                }
            }
            
        }
    }

    return root;
}


string
XLIFFFile::ToString() const
{
    XMLNode* xml = ToXMLNode();
    string s = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
    s += xml->ToString(XLIFF_NAMESPACES);
    delete xml;
    s += '\n';
    return s;
}

Stats
XLIFFFile::GetStats(const string& config) const
{
    Stats stat;
    stat.config = config;
    stat.files = m_files.size();
    stat.toBeTranslated = 0;
    stat.noComments = 0;

    for (vector<File>::const_iterator file=m_files.begin(); file!=m_files.end(); file++) {
        stat.toBeTranslated += file->transUnits.size();

        for (vector<TransUnit>::const_iterator tu=file->transUnits.begin();
                    tu!=file->transUnits.end(); tu++) {
            if (tu->source.comment == "") {
                stat.noComments++;
            }
        }
    }

    stat.totalStrings = stat.toBeTranslated;

    return stat;
}