/* * Copyright (C) 2008-2012 OMRON SOFTWARE Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "nj_lib.h" #include "nj_err.h" #include "nj_ext.h" #include "nj_dic.h" #include "njd.h" #define DATA_SIZE (10) #define DATA_OFFSET_FHINSI (0) #define DATA_OFFSET_BHINSI (1) #define DATA_OFFSET_HINDO (2) #define DATA_OFFSET_CANDIDATE (3) #define DATA_OFFSET_CANDIDATE_LEN (5) #define DATA_OFFSET_YOMI (6) #define DATA_OFFSET_YOMI_LEN (9) #define YOMINASI_DIC_FREQ_DIV 63 #define DATA_FHINSI(x) \ ( (NJ_UINT16)(0x01FF & \ (((NJ_UINT16)*((x)+DATA_OFFSET_FHINSI ) << 1) | \ ( *((x)+DATA_OFFSET_FHINSI+1) >> 7))) ) #define DATA_BHINSI(x) \ ( (NJ_UINT16)(0x01FF & \ (((NJ_UINT16)*((x)+DATA_OFFSET_BHINSI ) << 2) | \ ( *((x)+DATA_OFFSET_BHINSI+1) >> 6))) ) #define DATA_HINDO(x) \ ((NJ_HINDO)(0x003F & ((NJ_UINT16)*((x)+DATA_OFFSET_HINDO)))) #define DATA_CANDIDATE(x) \ ((NJ_UINT32)(0x000FFFFF & \ (((NJ_UINT32)*((x)+DATA_OFFSET_CANDIDATE) << 12) | \ ((NJ_UINT32)*((x)+DATA_OFFSET_CANDIDATE+1) << 4) | \ ( *((x)+DATA_OFFSET_CANDIDATE+2) >> 4)))) #define DATA_CANDIDATE_SIZE(x) \ ((NJ_UINT8)((*((x)+DATA_OFFSET_CANDIDATE_LEN) << 4) | \ (*((x)+DATA_OFFSET_CANDIDATE_LEN+1) >> 4))) #define DATA_YOMI(x) \ ((NJ_UINT32)(0x000FFFFF & \ (((NJ_UINT32)*((x)+DATA_OFFSET_YOMI) << 16) | \ ((NJ_UINT32)*((x)+DATA_OFFSET_YOMI+1) << 8) | \ ( *((x)+DATA_OFFSET_YOMI+2) )))) #define DATA_YOMI_SIZE(x) \ ((NJ_UINT8)((*((x)+DATA_OFFSET_YOMI_LEN)))) #define YOMI_INDX_TOP_ADDR(h) ((NJ_UINT8*)((h)+NJ_INT32_READ((h)+0x1C))) #define YOMI_INDX_CNT(h) ((NJ_UINT16)(NJ_INT16_READ((h)+0x20))) #define YOMI_INDX_BYTE(h) ((NJ_UINT16)(NJ_INT16_READ((h)+0x22))) #define STEM_AREA_TOP_ADDR(h) ((NJ_UINT8*)((h)+NJ_INT32_READ((h)+0x24))) #define STRS_AREA_TOP_ADDR(h) ((NJ_UINT8*)((h)+NJ_INT32_READ((h)+0x28))) #define YOMI_AREA_TOP_ADDR(h) ((NJ_UINT8*)((h)+NJ_INT32_READ((h)+0x2C))) #define NO_CONV_FLG ((NJ_UINT32) 0x00080000L) #define HINSI_OFFSET (7) #define CURRENT_INFO_SET (NJ_UINT8)(0x10) static NJ_UINT16 search_data(NJ_SEARCH_CONDITION *condition, NJ_SEARCH_LOCATION_SET *loctset); static NJ_UINT16 convert_to_yomi(NJ_DIC_HANDLE hdl, NJ_UINT8 *index, NJ_UINT16 len, NJ_CHAR *yomi, NJ_UINT16 size); static NJ_UINT16 yomi_strcmp_forward(NJ_DIC_HANDLE hdl, NJ_UINT8 *data, NJ_CHAR *yomi); static NJ_UINT16 search_data(NJ_SEARCH_CONDITION *condition, NJ_SEARCH_LOCATION_SET *loctset) { NJ_UINT32 offset; NJ_UINT8 *data; NJ_UINT16 i, j; NJ_UINT16 hindo; NJ_UINT8 hit_flg; NJ_UINT8 *tmp_hinsi = NULL; offset = loctset->loct.current; data = STEM_AREA_TOP_ADDR(loctset->loct.handle) + offset; if (GET_LOCATION_STATUS(loctset->loct.status) != NJ_ST_SEARCH_NO_INIT) { data += DATA_SIZE; offset += DATA_SIZE; if (data >= STRS_AREA_TOP_ADDR(loctset->loct.handle)) { loctset->loct.status = NJ_ST_SEARCH_END; return 0; } } tmp_hinsi = condition->hinsi.fore; condition->hinsi.fore = condition->hinsi.yominasi_fore; i = (STRS_AREA_TOP_ADDR(loctset->loct.handle) - data) / DATA_SIZE; for (j = 0; j < i; j++) { if (njd_connect_test(condition, DATA_FHINSI(data), DATA_BHINSI(data))) { hit_flg = 0; if (condition->operation == NJ_CUR_OP_LINK) { hit_flg = 1; } else { if (yomi_strcmp_forward(loctset->loct.handle, data, condition->yomi)) { hit_flg = 1; } } if (hit_flg) { loctset->loct.current_info = CURRENT_INFO_SET; loctset->loct.current = offset; loctset->loct.status = NJ_ST_SEARCH_READY; hindo = DATA_HINDO(STEM_AREA_TOP_ADDR(loctset->loct.handle) + loctset->loct.current); loctset->cache_freq = CALCULATE_HINDO(hindo, loctset->dic_freq.base, loctset->dic_freq.high, YOMINASI_DIC_FREQ_DIV); condition->hinsi.fore = tmp_hinsi; return 1; } } data += DATA_SIZE; offset += DATA_SIZE; } loctset->loct.status = NJ_ST_SEARCH_END; condition->hinsi.fore = tmp_hinsi; return 0; } static NJ_UINT16 convert_to_yomi(NJ_DIC_HANDLE hdl, NJ_UINT8 *index, NJ_UINT16 len, NJ_CHAR *yomi, NJ_UINT16 size) { NJ_UINT8 *wkc; NJ_CHAR *wky; NJ_UINT16 i, idx, yib, ret; NJ_UINT16 j, char_len; wkc = YOMI_INDX_TOP_ADDR(hdl); yib = YOMI_INDX_BYTE(hdl); if (NJ_CHAR_ILLEGAL_DIC_YINDEX(yib)) { return 0; } ret = 0; wky = yomi; for (i = 0; i < len; i++) { idx = (NJ_UINT16)((*index - 1) * yib); if (yib == 2) { char_len = UTL_CHAR(wkc + idx); if (((ret + char_len + NJ_TERM_LEN) * sizeof(NJ_CHAR)) > size) { return (size / sizeof(NJ_CHAR)); } for (j = 0; j < char_len; j++) { NJ_CHAR_COPY(wky, wkc + idx + j); wky++; ret++; } } else { if (((ret + 1 + NJ_TERM_LEN) * sizeof(NJ_CHAR)) > size) { return (size / sizeof(NJ_CHAR)); } *wky++ = (NJ_CHAR)(*(wkc + idx)); ret++; } index++; } *wky = NJ_CHAR_NUL; return ret; } static NJ_UINT16 yomi_strcmp_forward(NJ_DIC_HANDLE hdl, NJ_UINT8 *data, NJ_CHAR *yomi) { NJ_UINT8 *area; NJ_CHAR *stroke; NJ_CHAR buf[NJ_MAX_LEN + NJ_TERM_LEN]; NJ_UINT16 ylen, dic_ylen, j, size; size = sizeof(buf); stroke = buf; area = YOMI_AREA_TOP_ADDR(hdl) + DATA_YOMI(data); if (YOMI_INDX_CNT(hdl) == 0) { dic_ylen = DATA_YOMI_SIZE(data) / sizeof(NJ_CHAR); if (size < ((dic_ylen + NJ_TERM_LEN) * sizeof(NJ_CHAR))) { return 0; } for (j = 0; j < dic_ylen; j++) { NJ_CHAR_COPY(stroke, area); stroke++; area += sizeof(NJ_CHAR); } *stroke = NJ_CHAR_NUL; } else { dic_ylen = convert_to_yomi(hdl, area, DATA_YOMI_SIZE(data), stroke, size); if (size < ((dic_ylen + NJ_TERM_LEN) * sizeof(NJ_CHAR))) { return 0; } } ylen = nj_strlen(yomi); if (dic_ylen < ylen) { return 0; } if (nj_strncmp(yomi, buf, ylen) == 0) { return 1; } return 0; } NJ_INT16 njd_f_search_word(NJ_SEARCH_CONDITION *con, NJ_SEARCH_LOCATION_SET *loctset) { NJ_UINT16 ret; switch (con->operation) { case NJ_CUR_OP_LINK: if ((con->hinsi.yominasi_fore == NULL) || (con->hinsi.foreSize == 0)) { loctset->loct.status = NJ_ST_SEARCH_END; return 0; } break; case NJ_CUR_OP_FORE: if (NJ_CHAR_STRLEN_IS_0(con->yomi)) { loctset->loct.status = NJ_ST_SEARCH_END; return 0; } if ((con->hinsi.yominasi_fore == NULL) || (con->hinsi.foreSize == 0)) { loctset->loct.status = NJ_ST_SEARCH_END; return 0; } break; default: loctset->loct.status = NJ_ST_SEARCH_END; return 0; } if (con->mode != NJ_CUR_MODE_FREQ) { loctset->loct.status = NJ_ST_SEARCH_END; return 0; } if ((GET_LOCATION_STATUS(loctset->loct.status) == NJ_ST_SEARCH_NO_INIT) || (GET_LOCATION_STATUS(loctset->loct.status) == NJ_ST_SEARCH_READY)) { ret = search_data(con, loctset); if (ret < 1) { loctset->loct.status = NJ_ST_SEARCH_END; } return ret; } else { loctset->loct.status = NJ_ST_SEARCH_END; return 0; } } NJ_INT16 njd_f_get_word(NJ_SEARCH_LOCATION_SET *loctset, NJ_WORD *word) { NJ_UINT8 *data; NJ_CHAR stroke[NJ_MAX_LEN + NJ_TERM_LEN]; NJ_INT16 yomilen, kouholen; if (GET_LOCATION_STATUS(loctset->loct.status) == NJ_ST_SEARCH_END) { return 0; } data = STEM_AREA_TOP_ADDR(loctset->loct.handle) + loctset->loct.current; NJ_SET_YLEN_TO_STEM(word, 1); word->stem.loc = loctset->loct; yomilen = njd_f_get_stroke(word, stroke, sizeof(stroke)); if (yomilen <= 0) { return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_WORD, NJ_ERR_INVALID_RESULT); } word->stem.info1 = yomilen; word->stem.info1 |= (NJ_UINT16)(DATA_FHINSI(data) << HINSI_OFFSET); word->stem.info2 = (NJ_UINT16)(DATA_BHINSI(data) << HINSI_OFFSET); kouholen = (NJ_UINT16)DATA_CANDIDATE_SIZE(data)/sizeof(NJ_CHAR); if (kouholen == 0) { kouholen = yomilen; } word->stem.info2 |= kouholen; word->stem.hindo = CALCULATE_HINDO(DATA_HINDO(data), loctset->dic_freq.base, loctset->dic_freq.high, YOMINASI_DIC_FREQ_DIV); word->stem.type = 0; return 1; } NJ_INT16 njd_f_get_stroke(NJ_WORD *word, NJ_CHAR *stroke, NJ_UINT16 size) { NJ_SEARCH_LOCATION *loc; NJ_UINT8 *area, *data; NJ_UINT16 len; NJ_UINT32 j; if (NJ_GET_YLEN_FROM_STEM(word) == 0) { return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_STROKE, NJ_ERR_INVALID_RESULT); } loc = &word->stem.loc; data = STEM_AREA_TOP_ADDR(loc->handle) + loc->current; area = YOMI_AREA_TOP_ADDR(loc->handle) + DATA_YOMI(data); if (YOMI_INDX_CNT(loc->handle) == 0) { len = DATA_YOMI_SIZE(data)/sizeof(NJ_CHAR); if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) { return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_STROKE, NJ_ERR_BUFFER_NOT_ENOUGH); } for (j = 0; j < len; j++) { NJ_CHAR_COPY(stroke, area); stroke++; area += sizeof(NJ_CHAR); } *stroke = NJ_CHAR_NUL; } else { len = convert_to_yomi(loc->handle, area, DATA_YOMI_SIZE(data), stroke, size); if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) { return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_STROKE, NJ_ERR_BUFFER_NOT_ENOUGH); } } return len; } NJ_INT16 njd_f_get_candidate(NJ_WORD *word, NJ_CHAR *candidate, NJ_UINT16 size) { NJ_SEARCH_LOCATION *loc; NJ_UINT8 *data, *area; NJ_CHAR work[NJ_MAX_LEN + NJ_TERM_LEN]; NJ_UINT16 len, j; loc = &word->stem.loc; data = STEM_AREA_TOP_ADDR(loc->handle) + loc->current; len = DATA_CANDIDATE_SIZE(data)/sizeof(NJ_CHAR); if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) { return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_CANDIDATE, NJ_ERR_BUFFER_NOT_ENOUGH); } if (len == 0) { area = YOMI_AREA_TOP_ADDR(loc->handle) + DATA_YOMI(data); if (YOMI_INDX_CNT(loc->handle) == 0) { len = DATA_YOMI_SIZE(data)/sizeof(NJ_CHAR); if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) { return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_STROKE, NJ_ERR_BUFFER_NOT_ENOUGH); } for (j = 0; j < len; j++) { NJ_CHAR_COPY(candidate + j, area); area += sizeof(NJ_CHAR); } candidate[len] = NJ_CHAR_NUL; return len; } else { len = convert_to_yomi(loc->handle, area, DATA_YOMI_SIZE(data), work, size); if (size < ((len + NJ_TERM_LEN) * sizeof(NJ_CHAR))) { return NJ_SET_ERR_VAL(NJ_FUNC_NJD_F_GET_CANDIDATE, NJ_ERR_BUFFER_NOT_ENOUGH); } } if (DATA_CANDIDATE(data) & NO_CONV_FLG) { nje_convert_hira_to_kata(work, candidate, len); } else { for (j = 0; j < len; j++) { candidate[j] = work[j]; } } } else { area = STRS_AREA_TOP_ADDR(loc->handle) + DATA_CANDIDATE(data); for (j = 0; j < len; j++) { NJ_CHAR_COPY(candidate + j, area); area += sizeof(NJ_CHAR); } } candidate[len] = NJ_CHAR_NUL; return len; }