C++程序  |  829行  |  22.45 KB

/*
 * Copyright (C) 2009 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "../include/dictdef.h"

#ifdef ___BUILD_MODEL___
#include "../include/spellingtable.h"
#endif

#include "../include/spellingtrie.h"

namespace ime_pinyin {

SpellingTrie* SpellingTrie::instance_ = NULL;

// z/c/s is for Zh/Ch/Sh
const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
    "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";

// Bit 0 : is it a Shengmu char?
// Bit 1 : is it a Yunmu char? (one char is a Yunmu)
// Bit 2 : is it enabled in ShouZiMu(first char) mode?
unsigned char SpellingTrie::char_flags_[] = {
  // a    b      c     d     e     f     g
  0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
  // h    i     j      k     l     m    n
  0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
  // o    p     q      r     s     t
  0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
  // u    v     w      x     y     z
  0x00, 0x00, 0x01, 0x01, 0x01, 0x01
};

int compare_spl(const void* p1, const void* p2) {
  return strcmp((const char*)(p1), (const char*)(p2));
}

SpellingTrie::SpellingTrie() {
  spelling_buf_ = NULL;
  spelling_size_ = 0;
  spelling_num_ = 0;
  spl_ym_ids_ = NULL;
  splstr_queried_ = NULL;
  splstr16_queried_ = NULL;
  root_ = NULL;
  dumb_node_ = NULL;
  splitter_node_ = NULL;
  instance_ = NULL;
  ym_buf_ = NULL;
  f2h_ = NULL;

  szm_enable_shm(true);
  szm_enable_ym(true);

#ifdef ___BUILD_MODEL___
  node_num_ = 0;
#endif
}

SpellingTrie::~SpellingTrie() {
  if (NULL != spelling_buf_)
    delete [] spelling_buf_;

  if (NULL != splstr_queried_)
    delete [] splstr_queried_;

  if (NULL != splstr16_queried_)
    delete [] splstr16_queried_;

  if (NULL != spl_ym_ids_)
    delete [] spl_ym_ids_;

  if (NULL != root_) {
    free_son_trie(root_);
    delete root_;
  }

  if (NULL != dumb_node_) {
    delete [] dumb_node_;
  }

  if (NULL != splitter_node_) {
    delete [] splitter_node_;
  }

  if (NULL != instance_) {
    delete instance_;
    instance_ = NULL;
  }

  if (NULL != ym_buf_)
    delete [] ym_buf_;

  if (NULL != f2h_)
    delete [] f2h_;
}

bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
  if (NULL == splid || 0 == *splid)
    return false;

  if (*splid >= kFullSplIdStart)
    return true;
  if (*splid < kFullSplIdStart) {
    char ch = kHalfId2Sc_[*splid];
    if (ch > 'Z') {
      return true;
    } else {
      if (szm_is_enabled(ch)) {
        return true;
      } else if (is_yunmu_char(ch)) {
        assert(h2f_num_[*splid] > 0);
        *splid = h2f_start_[*splid];
        return true;
      }
    }
  }
  return false;
}

bool SpellingTrie::is_half_id(uint16 splid) const {
  if (0 == splid || splid >= kFullSplIdStart)
    return false;

  return true;
}

bool SpellingTrie::is_full_id(uint16 splid) const {
  if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
    return false;
  return true;
}

bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
  uint16 half_fr_full = full_to_half(full_id);

  if (half_fr_full == half_id)
    return true;

  // &~0x20 is used to conver the char to upper case.
  // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
  char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
  char ch_h = kHalfId2Sc_[half_id];
  if (ch_f == ch_h)
    return true;

  return false;
}

bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
  if (0 == splid || splid >= kFullSplIdStart)
    return false;

  char ch = kHalfId2Sc_[splid];
  // If ch >= 'a', that means the half id is one of Zh/Ch/Sh
  if (ch >= 'a') {
    return false;
  }

  return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
}

bool SpellingTrie::is_shengmu_char(char ch) const {
  return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
}

bool SpellingTrie::is_yunmu_char(char ch) const {
  return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
}

bool SpellingTrie::is_szm_char(char ch) const {
  return is_shengmu_char(ch) || is_yunmu_char(ch);
}

bool SpellingTrie::szm_is_enabled(char ch) const {
  return char_flags_[ch - 'A'] & kHalfIdSzmMask;
}

void SpellingTrie::szm_enable_shm(bool enable) {
  if (enable) {
    for (char ch = 'A'; ch <= 'Z'; ch++) {
      if (is_shengmu_char(ch))
        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
    }
  } else {
    for (char ch = 'A'; ch <= 'Z'; ch++) {
      if (is_shengmu_char(ch))
        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
    }
  }
}

void SpellingTrie::szm_enable_ym(bool enable) {
  if (enable) {
    for (char ch = 'A'; ch <= 'Z'; ch++) {
      if (is_yunmu_char(ch))
        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
    }
  } else {
    for (char ch = 'A'; ch <= 'Z'; ch++) {
      if (is_yunmu_char(ch))
        char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
    }
  }
}

bool SpellingTrie::is_szm_enabled(char ch) const {
  return char_flags_[ch - 'A'] & kHalfIdSzmMask;
}

const SpellingTrie* SpellingTrie::get_cpinstance() {
  return &get_instance();
}

SpellingTrie& SpellingTrie::get_instance() {
  if (NULL == instance_)
    instance_ = new SpellingTrie();

  return *instance_;
}

uint16 SpellingTrie::half2full_num(uint16 half_id) const {
  if (NULL == root_ || half_id >= kFullSplIdStart)
    return 0;
  return h2f_num_[half_id];
}

uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
  if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
    return 0;

  *spl_id_start = h2f_start_[half_id];
  return h2f_num_[half_id];
}

uint16 SpellingTrie::full_to_half(uint16 full_id) const {
  if (NULL == root_ || full_id < kFullSplIdStart ||
      full_id > spelling_num_ + kFullSplIdStart)
    return 0;

  return f2h_[full_id - kFullSplIdStart];
}

void SpellingTrie::free_son_trie(SpellingNode* node) {
  if (NULL == node)
    return;

  for (size_t pos = 0; pos < node->num_of_son; pos++) {
    free_son_trie(node->first_son + pos);
  }

  if (NULL != node->first_son)
    delete [] node->first_son;
}

bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
                             size_t item_num, float score_amplifier,
                             unsigned char average_score) {
  if (spelling_arr == NULL)
    return false;

  memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
  memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);

  // If the arr is the same as the buf, means this function is called by
  // load_table(), the table data are ready; otherwise the array should be
  // saved.
  if (spelling_arr != spelling_buf_) {
    if (NULL != spelling_buf_)
      delete [] spelling_buf_;
    spelling_buf_ = new char[item_size * item_num];
    if (NULL == spelling_buf_)
      return false;
    memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
  }

  spelling_size_ = item_size;
  spelling_num_ = item_num;

  score_amplifier_ = score_amplifier;
  average_score_ = average_score;

  if (NULL != splstr_queried_)
    delete [] splstr_queried_;
  splstr_queried_ = new char[spelling_size_];
  if (NULL == splstr_queried_)
    return false;

  if (NULL != splstr16_queried_)
    delete [] splstr16_queried_;
  splstr16_queried_ = new char16[spelling_size_];
  if (NULL == splstr16_queried_)
    return false;

  // First, sort the buf to ensure they are in ascendant order
  qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);

#ifdef ___BUILD_MODEL___
  node_num_ = 1;
#endif

  root_ = new SpellingNode();
  memset(root_, 0, sizeof(SpellingNode));

  dumb_node_ = new SpellingNode();
  memset(dumb_node_, 0, sizeof(SpellingNode));
  dumb_node_->score = average_score_;

  splitter_node_ = new SpellingNode();
  memset(splitter_node_, 0, sizeof(SpellingNode));
  splitter_node_->score = average_score_;

  memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);

  root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);

  // Root's score should be cleared.
  root_->score = 0;

  if (NULL == root_->first_son)
    return false;

  h2f_start_[0] = h2f_num_[0] = 0;

  if (!build_f2h())
    return false;

#ifdef ___BUILD_MODEL___
  if (kPrintDebug0) {
    printf("---SpellingTrie Nodes: %d\n", node_num_);
  }
  return build_ym_info();
#else
  return true;
#endif
}

#ifdef ___BUILD_MODEL___
const char* SpellingTrie::get_ym_str(const char *spl_str) {
  bool start_ZCS = false;
  if (is_shengmu_char(*spl_str)) {
    if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
      start_ZCS = true;
    spl_str += 1;
    if (start_ZCS && 'h' == *spl_str)
      spl_str += 1;
  }
  return spl_str;
}

bool SpellingTrie::build_ym_info() {
  bool sucess;
  SpellingTable *spl_table = new SpellingTable();

  sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
  assert(sucess);

  for (uint16 pos = 0; pos < spelling_num_; pos++) {
    const char *spl_str = spelling_buf_ + spelling_size_ * pos;
    spl_str = get_ym_str(spl_str);
    if ('\0' != spl_str[0]) {
      sucess = spl_table->put_spelling(spl_str, 0);
      assert(sucess);
    }
  }

  size_t ym_item_size;  // '\0' is included
  size_t ym_num;
  const char* ym_buf;
  ym_buf = spl_table->arrange(&ym_item_size, &ym_num);

  if (NULL != ym_buf_)
    delete [] ym_buf_;
  ym_buf_ = new char[ym_item_size * ym_num];
  if (NULL == ym_buf_) {
    delete spl_table;
    return false;
  }

  memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
  ym_size_ = ym_item_size;
  ym_num_ = ym_num;

  delete spl_table;

  // Generate the maping from the spelling ids to the Yunmu ids.
  if (spl_ym_ids_)
    delete spl_ym_ids_;
  spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
  if (NULL == spl_ym_ids_)
    return false;

  memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));

  for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
    const char *str = get_spelling_str(id);

    str = get_ym_str(str);
    if ('\0' != str[0]) {
      uint8 ym_id = get_ym_id(str);
      spl_ym_ids_[id] = ym_id;
      assert(ym_id > 0);
    } else {
      spl_ym_ids_[id] = 0;
    }
  }
  return true;
}
#endif

SpellingNode* SpellingTrie::construct_spellings_subset(
    size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
  if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
    return NULL;

  SpellingNode *first_son = NULL;
  uint16 num_of_son = 0;
  unsigned char min_son_score = 255;

  const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
  char char_for_node = spelling_last_start[level];
  assert(char_for_node >= 'A' && char_for_node <= 'Z' ||
         'h' == char_for_node);

  // Scan the array to find how many sons
  for (size_t i = item_start + 1; i < item_end; i++) {
    const char *spelling_current = spelling_buf_ + spelling_size_ * i;
    char char_current = spelling_current[level];
    if (char_current != char_for_node) {
      num_of_son++;
      char_for_node = char_current;
    }
  }
  num_of_son++;

  // Allocate memory
#ifdef ___BUILD_MODEL___
  node_num_ += num_of_son;
#endif
  first_son = new SpellingNode[num_of_son];
  memset(first_son, 0, sizeof(SpellingNode)*num_of_son);

  // Now begin construct tree
  size_t son_pos = 0;

  spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
  char_for_node = spelling_last_start[level];

  bool spelling_endable = true;
  if (spelling_last_start[level + 1] != '\0')
    spelling_endable = false;

  size_t item_start_next = item_start;

  for (size_t i = item_start + 1; i < item_end; i++) {
    const char *spelling_current = spelling_buf_ + spelling_size_ * i;
    char char_current = spelling_current[level];
    assert(is_valid_spl_char(char_current));

    if (char_current != char_for_node) {
      // Construct a node
      SpellingNode *node_current = first_son + son_pos;
      node_current->char_this_node = char_for_node;

      // For quick search in the first level
      if (0 == level)
        level1_sons_[char_for_node - 'A'] = node_current;

      if (spelling_endable) {
        node_current->spelling_idx = kFullSplIdStart + item_start_next;
      }

      if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
        size_t real_start = item_start_next;
        if (spelling_last_start[level + 1] == '\0')
          real_start++;

        node_current->first_son =
            construct_spellings_subset(real_start, i, level + 1,
                                       node_current);

        if (real_start == item_start_next + 1) {
          uint16 score_this = static_cast<unsigned char>(
              spelling_last_start[spelling_size_ - 1]);
          if (score_this < node_current->score)
            node_current->score = score_this;
        }
      } else {
        node_current->first_son = NULL;
        node_current->score = static_cast<unsigned char>(
            spelling_last_start[spelling_size_ - 1]);
      }

      if (node_current->score < min_son_score)
        min_son_score = node_current->score;

      bool is_half = false;
      if (level == 0 && is_szm_char(char_for_node)) {
        node_current->spelling_idx =
          static_cast<uint16>(char_for_node - 'A' + 1);

        if (char_for_node > 'C')
          node_current->spelling_idx++;
        if (char_for_node > 'S')
          node_current->spelling_idx++;

        h2f_num_[node_current->spelling_idx] = i - item_start_next;
        is_half = true;
      } else if (level == 1 && char_for_node == 'h') {
        char ch_level0 = spelling_last_start[0];
        uint16 part_id = 0;
        if (ch_level0 == 'C')
          part_id = 'C' - 'A' + 1 + 1;
        else if (ch_level0 == 'S')
          part_id = 'S' - 'A' + 1 + 2;
        else if (ch_level0 == 'Z')
          part_id = 'Z' - 'A' + 1 + 3;
        if (0 != part_id) {
          node_current->spelling_idx = part_id;
          h2f_num_[node_current->spelling_idx] = i - item_start_next;
          is_half = true;
        }
      }

      if (is_half) {
        if (h2f_num_[node_current->spelling_idx] > 0)
          h2f_start_[node_current->spelling_idx] =
            item_start_next + kFullSplIdStart;
        else
          h2f_start_[node_current->spelling_idx] = 0;
      }

      // for next sibling
      spelling_last_start = spelling_current;
      char_for_node = char_current;
      item_start_next = i;
      spelling_endable = true;
      if (spelling_current[level + 1] != '\0')
        spelling_endable = false;

      son_pos++;
    }
  }

  // the last one
  SpellingNode *node_current = first_son + son_pos;
  node_current->char_this_node = char_for_node;

  // For quick search in the first level
  if (0 == level)
    level1_sons_[char_for_node - 'A'] = node_current;

  if (spelling_endable) {
    node_current->spelling_idx = kFullSplIdStart + item_start_next;
  }

  if (spelling_last_start[level + 1] != '\0' ||
      item_end - item_start_next > 1) {
    size_t real_start = item_start_next;
    if (spelling_last_start[level + 1] == '\0')
      real_start++;

    node_current->first_son =
        construct_spellings_subset(real_start, item_end, level + 1,
                                   node_current);

    if (real_start == item_start_next + 1) {
      uint16 score_this = static_cast<unsigned char>(
          spelling_last_start[spelling_size_ - 1]);
      if (score_this < node_current->score)
        node_current->score = score_this;
    }
  } else {
    node_current->first_son = NULL;
    node_current->score = static_cast<unsigned char>(
        spelling_last_start[spelling_size_ - 1]);
  }

  if (node_current->score < min_son_score)
    min_son_score = node_current->score;

  assert(son_pos + 1 == num_of_son);

  bool is_half = false;
  if (level == 0 && szm_is_enabled(char_for_node)) {
    node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);

    if (char_for_node > 'C')
      node_current->spelling_idx++;
    if (char_for_node > 'S')
      node_current->spelling_idx++;

    h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
    is_half = true;
  } else if (level == 1 && char_for_node == 'h') {
    char ch_level0 = spelling_last_start[0];
    uint16 part_id = 0;
    if (ch_level0 == 'C')
      part_id = 'C' - 'A' + 1 + 1;
    else if (ch_level0 == 'S')
      part_id = 'S' - 'A' + 1 + 2;
    else if (ch_level0 == 'Z')
      part_id = 'Z' - 'A' + 1 + 3;
    if (0 != part_id) {
      node_current->spelling_idx = part_id;
      h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
      is_half = true;
    }
  }
  if (is_half) {
    if (h2f_num_[node_current->spelling_idx] > 0)
      h2f_start_[node_current->spelling_idx] =
        item_start_next + kFullSplIdStart;
    else
      h2f_start_[node_current->spelling_idx] = 0;
  }

  parent->num_of_son = num_of_son;
  parent->score = min_son_score;
  return first_son;
}

bool SpellingTrie::save_spl_trie(FILE *fp) {
  if (NULL == fp || NULL == spelling_buf_)
    return false;

  if (fwrite(&spelling_size_, sizeof(size_t), 1, fp) != 1)
    return false;

  if (fwrite(&spelling_num_, sizeof(size_t), 1, fp) != 1)
    return false;

  if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
    return false;

  if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
    return false;

  if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
             spelling_num_, fp) != spelling_num_)
    return false;

  return true;
}

bool SpellingTrie::load_spl_trie(FILE *fp) {
  if (NULL == fp)
    return false;

  if (fread(&spelling_size_, sizeof(size_t), 1, fp) != 1)
    return false;

  if (fread(&spelling_num_, sizeof(size_t), 1, fp) != 1)
    return false;

  if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
    return false;

  if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
    return false;

  if (NULL != spelling_buf_)
    delete [] spelling_buf_;

  spelling_buf_ = new char[spelling_size_ * spelling_num_];
  if (NULL == spelling_buf_)
    return false;

  if (fread(spelling_buf_, sizeof(char) * spelling_size_,
            spelling_num_, fp) != spelling_num_)
    return false;

  return construct(spelling_buf_, spelling_size_, spelling_num_,
                   score_amplifier_, average_score_);
}

bool SpellingTrie::build_f2h() {
  if (NULL != f2h_)
    delete [] f2h_;
  f2h_ = new uint16[spelling_num_];
  if (NULL == f2h_)
    return false;

  for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
    for (uint16 fid = h2f_start_[hid];
         fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
      f2h_[fid - kFullSplIdStart] = hid;
  }

  return true;
}

size_t SpellingTrie::get_spelling_num() {
  return spelling_num_;
}

uint8 SpellingTrie::get_ym_id(const char *ym_str) {
  if (NULL == ym_str || NULL == ym_buf_)
    return 0;

  for (uint8 pos = 0; pos < ym_num_; pos++)
    if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
      return pos + 1;

  return 0;
}

const char* SpellingTrie::get_spelling_str(uint16 splid) {
  splstr_queried_[0] = '\0';

  if (splid >= kFullSplIdStart) {
    splid -= kFullSplIdStart;
    snprintf(splstr_queried_, spelling_size_, "%s",
             spelling_buf_ + splid * spelling_size_);
  } else {
    if (splid == 'C' - 'A' + 1 + 1) {
      snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
    } else if (splid == 'S' - 'A' + 1 + 2) {
      snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
    } else if (splid == 'Z' - 'A' + 1 + 3) {
      snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
    } else {
      if (splid > 'C' - 'A' + 1)
        splid--;
      if (splid > 'S' - 'A' + 1)
        splid--;
      splstr_queried_[0] = 'A' + splid - 1;
      splstr_queried_[1] = '\0';
    }
  }
  return splstr_queried_;
}

const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
  splstr16_queried_[0] = '\0';

  if (splid >= kFullSplIdStart) {
    splid -= kFullSplIdStart;
    for (size_t pos = 0; pos < spelling_size_; pos++) {
      splstr16_queried_[pos] = static_cast<char16>
          (spelling_buf_[splid * spelling_size_ + pos]);
    }
  } else {
    if (splid == 'C' - 'A' + 1 + 1) {
      splstr16_queried_[0] = static_cast<char16>('C');
      splstr16_queried_[1] = static_cast<char16>('h');
      splstr16_queried_[2] = static_cast<char16>('\0');
    } else if (splid == 'S' - 'A' + 1 + 2) {
      splstr16_queried_[0] = static_cast<char16>('S');
      splstr16_queried_[1] = static_cast<char16>('h');
      splstr16_queried_[2] = static_cast<char16>('\0');
    } else if (splid == 'Z' - 'A' + 1 + 3) {
      splstr16_queried_[0] = static_cast<char16>('Z');
      splstr16_queried_[1] = static_cast<char16>('h');
      splstr16_queried_[2] = static_cast<char16>('\0');
    } else {
      if (splid > 'C' - 'A' + 1)
        splid--;
      if (splid > 'S' - 'A' + 1)
        splid--;
      splstr16_queried_[0] = 'A' + splid - 1;
      splstr16_queried_[1] = '\0';
    }
  }
  return splstr16_queried_;
}

size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
                                        size_t splstr16_len) {
  if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;

  if (splid >= kFullSplIdStart) {
    splid -= kFullSplIdStart;
    for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
      splstr16[pos] = static_cast<char16>
          (spelling_buf_[splid * spelling_size_ + pos]);
      if (static_cast<char16>('\0') == splstr16[pos]) {
        return pos;
      }
    }
  } else {
    if (splid == 'C' - 'A' + 1 + 1) {
      splstr16[0] = static_cast<char16>('C');
      splstr16[1] = static_cast<char16>('h');
      splstr16[2] = static_cast<char16>('\0');
      return 2;
    } else if (splid == 'S' - 'A' + 1 + 2) {
      splstr16[0] = static_cast<char16>('S');
      splstr16[1] = static_cast<char16>('h');
      splstr16[2] = static_cast<char16>('\0');
      return 2;
    } else if (splid == 'Z' - 'A' + 1 + 3) {
      splstr16[0] = static_cast<char16>('Z');
      splstr16[1] = static_cast<char16>('h');
      splstr16[2] = static_cast<char16>('\0');
      return 2;
    } else {
      if (splid > 'C' - 'A' + 1)
        splid--;
      if (splid > 'S' - 'A' + 1)
        splid--;
      splstr16[0] = 'A' + splid - 1;
      splstr16[1] = '\0';
      return 1;
    }
  }

  // Not reachable.
  return 0;
}

}  // namespace ime_pinyin