/* * Copyright (C) 2008-2012 OMRON SOFTWARE Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jp.co.omronsoft.openwnn.JAJP; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Arrays; import jp.co.omronsoft.openwnn.CandidateFilter; import jp.co.omronsoft.openwnn.ComposingText; import jp.co.omronsoft.openwnn.OpenWnn; import jp.co.omronsoft.openwnn.OpenWnnDictionaryImpl; import jp.co.omronsoft.openwnn.StrSegmentClause; import jp.co.omronsoft.openwnn.WnnClause; import jp.co.omronsoft.openwnn.WnnDictionary; import jp.co.omronsoft.openwnn.WnnEngine; import jp.co.omronsoft.openwnn.WnnSentence; import jp.co.omronsoft.openwnn.WnnWord; import android.content.SharedPreferences; import android.util.Log; /** * The OpenWnn engine class for Japanese IME. * * @author Copyright (C) 2009-2011 OMRON SOFTWARE CO., LTD. All Rights Reserved. */ public class OpenWnnEngineJAJP implements WnnEngine { /** Current dictionary type */ private int mDictType = DIC_LANG_INIT; /** Dictionary type (default) */ public static final int DIC_LANG_INIT = 0; /** Dictionary type (Japanese standard) */ public static final int DIC_LANG_JP = 0; /** Dictionary type (English standard) */ public static final int DIC_LANG_EN = 1; /** Dictionary type (Japanese person's name) */ public static final int DIC_LANG_JP_PERSON_NAME = 2; /** Dictionary type (User dictionary) */ public static final int DIC_USERDIC = 3; /** Dictionary type (Japanese EISU-KANA conversion) */ public static final int DIC_LANG_JP_EISUKANA = 4; /** Dictionary type (e-mail/URI) */ public static final int DIC_LANG_EN_EMAIL_ADDRESS = 5; /** Dictionary type (Japanese postal address) */ public static final int DIC_LANG_JP_POSTAL_ADDRESS = 6; /** Type of the keyboard */ private int mKeyboardType = KEYBOARD_UNDEF; /** Keyboard type (not defined) */ public static final int KEYBOARD_UNDEF = 0; /** Keyboard type (12-keys) */ public static final int KEYBOARD_KEYPAD12 = 1; /** Keyboard type (Qwerty) */ public static final int KEYBOARD_QWERTY = 2; /** Score(frequency value) of word in the learning dictionary */ public static final int FREQ_LEARN = 600; /** Score(frequency value) of word in the user dictionary */ public static final int FREQ_USER = 500; /** Maximum limit length of output */ public static final int MAX_OUTPUT_LENGTH = 50; /** Limitation of predicted candidates */ public static final int PREDICT_LIMIT = 100; /** Limitation of candidates one-line */ public static final int LIMIT_OF_CANDIDATES_1LINE = 500; /** OpenWnn dictionary */ private WnnDictionary mDictionaryJP; /** Word list */ private ArrayList<WnnWord> mConvResult; /** HashMap for checking duplicate word */ private HashMap<String, WnnWord> mCandTable; /** Input string (Hiragana) */ private String mInputHiragana; /** Input string (Romaji) */ private String mInputRomaji; /** Number of output candidates */ private int mOutputNum; /** * Where to get the next candidates from.<br> * (0:prefix search from the dictionary, 1:single clause converter, 2:Kana converter) */ private int mGetCandidateFrom; /** Previously selected word */ private WnnWord mPreviousWord; /** Converter for single/consecutive clause conversion */ private OpenWnnClauseConverterJAJP mClauseConverter; /** Kana converter (for EISU-KANA conversion) */ private KanaConverter mKanaConverter; /** Whether exact match search or prefix match search */ private boolean mExactMatchMode; /** Whether displaying single clause candidates or not */ private boolean mSingleClauseMode; /** A result of consecutive clause conversion */ private WnnSentence mConvertSentence; /** The candidate filter */ private CandidateFilter mFilter = null; /** * Constructor * * @param writableDictionaryName Writable dictionary file name(null if not use) */ public OpenWnnEngineJAJP(String writableDictionaryName) { /* load Japanese dictionary library */ mDictionaryJP = new OpenWnnDictionaryImpl( "/data/data/jp.co.omronsoft.openwnn/lib/libWnnJpnDic.so", writableDictionaryName ); if (!mDictionaryJP.isActive()) { mDictionaryJP = new OpenWnnDictionaryImpl( "/system/lib/libWnnJpnDic.so", writableDictionaryName ); } /* clear dictionary settings */ mDictionaryJP.clearDictionary(); mDictionaryJP.clearApproxPattern(); mDictionaryJP.setInUseState(false); /* work buffers */ mConvResult = new ArrayList<WnnWord>(); mCandTable = new HashMap<String, WnnWord>(); /* converters */ mClauseConverter = new OpenWnnClauseConverterJAJP(); mKanaConverter = new KanaConverter(); } /** * Set dictionary for prediction. * * @param strlen Length of input string */ private void setDictionaryForPrediction(int strlen) { WnnDictionary dict = mDictionaryJP; dict.clearDictionary(); if (mDictType != DIC_LANG_JP_EISUKANA) { dict.clearApproxPattern(); if (strlen == 0) { dict.setDictionary(2, 245, 245); dict.setDictionary(3, 100, 244); dict.setDictionary(WnnDictionary.INDEX_LEARN_DICTIONARY, FREQ_LEARN, FREQ_LEARN); } else { dict.setDictionary(0, 100, 400); if (strlen > 1) { dict.setDictionary(1, 100, 400); } dict.setDictionary(2, 245, 245); dict.setDictionary(3, 100, 244); dict.setDictionary(WnnDictionary.INDEX_USER_DICTIONARY, FREQ_USER, FREQ_USER); dict.setDictionary(WnnDictionary.INDEX_LEARN_DICTIONARY, FREQ_LEARN, FREQ_LEARN); if (mKeyboardType != KEYBOARD_QWERTY) { dict.setApproxPattern(WnnDictionary.APPROX_PATTERN_JAJP_12KEY_NORMAL); } } } } /** * Get a candidate. * * @param index Index of a candidate. * @return The candidate; {@code null} if there is no candidate. */ private WnnWord getCandidate(int index) { WnnWord word; if (mGetCandidateFrom == 0) { if (mDictType == OpenWnnEngineJAJP.DIC_LANG_JP_EISUKANA) { /* skip to Kana conversion if EISU-KANA conversion mode */ mGetCandidateFrom = 2; } else if (mSingleClauseMode) { /* skip to single clause conversion if single clause conversion mode */ mGetCandidateFrom = 1; } else { if (mConvResult.size() < PREDICT_LIMIT) { /* get prefix matching words from the dictionaries */ while (index >= mConvResult.size()) { if ((word = mDictionaryJP.getNextWord()) == null) { mGetCandidateFrom = 1; break; } if (!mExactMatchMode || mInputHiragana.equals(word.stroke)) { addCandidate(word); if (mConvResult.size() >= PREDICT_LIMIT) { mGetCandidateFrom = 1; break; } } } } else { mGetCandidateFrom = 1; } } } /* get candidates by single clause conversion */ if (mGetCandidateFrom == 1) { Iterator<?> convResult = mClauseConverter.convert(mInputHiragana); if (convResult != null) { while (convResult.hasNext()) { addCandidate((WnnWord)convResult.next()); } } /* end of candidates by single clause conversion */ mGetCandidateFrom = 2; } /* get candidates from Kana converter */ if (mGetCandidateFrom == 2) { List<WnnWord> addCandidateList = mKanaConverter.createPseudoCandidateList(mInputHiragana, mInputRomaji, mKeyboardType); Iterator<WnnWord> it = addCandidateList.iterator(); while(it.hasNext()) { addCandidate(it.next()); } mGetCandidateFrom = 3; } if (index >= mConvResult.size()) { return null; } return (WnnWord)mConvResult.get(index); } /** * Add a candidate to the conversion result buffer. * <br> * This method adds a word to the result buffer if there is not * the same one in the buffer and the length of the candidate * string is not longer than {@code MAX_OUTPUT_LENGTH}. * * @param word A word to be add * @return {@code true} if the word added; {@code false} if not. */ private boolean addCandidate(WnnWord word) { if (word.candidate == null || mCandTable.containsKey(word.candidate) || word.candidate.length() > MAX_OUTPUT_LENGTH) { return false; } if (mFilter != null && !mFilter.isAllowed(word)) { return false; } mCandTable.put(word.candidate, word); mConvResult.add(word); return true; } /** * Clear work area that hold candidates information. */ private void clearCandidates() { mConvResult.clear(); mCandTable.clear(); mOutputNum = 0; mInputHiragana = null; mInputRomaji = null; mGetCandidateFrom = 0; mSingleClauseMode = false; } /** * Set dictionary type. * * @param type Type of dictionary * @return {@code true} if the dictionary is changed; {@code false} if not. */ public boolean setDictionary(int type) { mDictType = type; return true; } /** * Set the search key and the search mode from {@link ComposingText}. * * @param text Input text * @param maxLen Maximum length to convert * @return Length of the search key */ private int setSearchKey(ComposingText text, int maxLen) { String input = text.toString(ComposingText.LAYER1); if (0 <= maxLen && maxLen <= input.length()) { input = input.substring(0, maxLen); mExactMatchMode = true; } else { mExactMatchMode = false; } if (input.length() == 0) { mInputHiragana = ""; mInputRomaji = ""; return 0; } mInputHiragana = input; mInputRomaji = text.toString(ComposingText.LAYER0); return input.length(); } /** * Clear the previous word's information. */ public void clearPreviousWord() { mPreviousWord = null; } /** * Set keyboard type. * * @param keyboardType Type of keyboard */ public void setKeyboardType(int keyboardType) { mKeyboardType = keyboardType; } /** * Set the candidate filter * * @param filter The candidate filter */ public void setFilter(CandidateFilter filter) { mFilter = filter; mClauseConverter.setFilter(filter); } /*********************************************************************** * WnnEngine's interface **********************************************************************/ /** @see jp.co.omronsoft.openwnn.WnnEngine#init */ public void init() { clearPreviousWord(); mClauseConverter.setDictionary(mDictionaryJP); mKanaConverter.setDictionary(mDictionaryJP); } /** @see jp.co.omronsoft.openwnn.WnnEngine#close */ public void close() {} /** @see jp.co.omronsoft.openwnn.WnnEngine#predict */ public int predict(ComposingText text, int minLen, int maxLen) { clearCandidates(); if (text == null) { return 0; } /* set mInputHiragana and mInputRomaji */ int len = setSearchKey(text, maxLen); /* set dictionaries by the length of input */ setDictionaryForPrediction(len); /* search dictionaries */ mDictionaryJP.setInUseState( true ); if (len == 0) { /* search by previously selected word */ return mDictionaryJP.searchWord(WnnDictionary.SEARCH_LINK, WnnDictionary.ORDER_BY_FREQUENCY, mInputHiragana, mPreviousWord); } else { if (mExactMatchMode) { /* exact matching */ mDictionaryJP.searchWord(WnnDictionary.SEARCH_EXACT, WnnDictionary.ORDER_BY_FREQUENCY, mInputHiragana); } else { /* prefix matching */ mDictionaryJP.searchWord(WnnDictionary.SEARCH_PREFIX, WnnDictionary.ORDER_BY_FREQUENCY, mInputHiragana); } return 1; } } /** @see jp.co.omronsoft.openwnn.WnnEngine#convert */ public int convert(ComposingText text) { clearCandidates(); if (text == null) { return 0; } mDictionaryJP.setInUseState( true ); int cursor = text.getCursor(ComposingText.LAYER1); String input; WnnClause head = null; if (cursor > 0) { /* convert previous part from cursor */ input = text.toString(ComposingText.LAYER1, 0, cursor - 1); Iterator headCandidates = mClauseConverter.convert(input); if ((headCandidates == null) || (!headCandidates.hasNext())) { return 0; } head = new WnnClause(input, (WnnWord)headCandidates.next()); /* set the rest of input string */ input = text.toString(ComposingText.LAYER1, cursor, text.size(ComposingText.LAYER1) - 1); } else { /* set whole of input string */ input = text.toString(ComposingText.LAYER1); } WnnSentence sentence = null; if (input.length() != 0) { sentence = mClauseConverter.consecutiveClauseConvert(input); } if (head != null) { sentence = new WnnSentence(head, sentence); } if (sentence == null) { return 0; } StrSegmentClause[] ss = new StrSegmentClause[sentence.elements.size()]; int pos = 0; int idx = 0; Iterator<WnnClause> it = sentence.elements.iterator(); while(it.hasNext()) { WnnClause clause = (WnnClause)it.next(); int len = clause.stroke.length(); ss[idx] = new StrSegmentClause(clause, pos, pos + len - 1); pos += len; idx += 1; } text.setCursor(ComposingText.LAYER2, text.size(ComposingText.LAYER2)); text.replaceStrSegment(ComposingText.LAYER2, ss, text.getCursor(ComposingText.LAYER2)); mConvertSentence = sentence; return 0; } /** @see jp.co.omronsoft.openwnn.WnnEngine#searchWords */ public int searchWords(String key) { clearCandidates(); return 0; } /** @see jp.co.omronsoft.openwnn.WnnEngine#searchWords */ public int searchWords(WnnWord word) { clearCandidates(); return 0; } /** @see jp.co.omronsoft.openwnn.WnnEngine#getNextCandidate */ public WnnWord getNextCandidate() { if (mInputHiragana == null) { return null; } WnnWord word = getCandidate(mOutputNum); if (word != null) { mOutputNum++; } return word; } /** @see jp.co.omronsoft.openwnn.WnnEngine#learn */ public boolean learn(WnnWord word) { int ret = -1; if (word.partOfSpeech.right == 0) { word.partOfSpeech = mDictionaryJP.getPOS(WnnDictionary.POS_TYPE_MEISI); } WnnDictionary dict = mDictionaryJP; if (word instanceof WnnSentence) { Iterator<WnnClause> clauses = ((WnnSentence)word).elements.iterator(); while (clauses.hasNext()) { WnnWord wd = clauses.next(); if (mPreviousWord != null) { ret = dict.learnWord(wd, mPreviousWord); } else { ret = dict.learnWord(wd); } mPreviousWord = wd; if (ret != 0) { break; } } } else { if (mPreviousWord != null) { ret = dict.learnWord(word, mPreviousWord); } else { ret = dict.learnWord(word); } mPreviousWord = word; mClauseConverter.setDictionary(dict); } return (ret == 0); } /** @see jp.co.omronsoft.openwnn.WnnEngine#addWord */ public int addWord(WnnWord word) { mDictionaryJP.setInUseState( true ); if (word.partOfSpeech.right == 0) { word.partOfSpeech = mDictionaryJP.getPOS(WnnDictionary.POS_TYPE_MEISI); } mDictionaryJP.addWordToUserDictionary(word); mDictionaryJP.setInUseState( false ); return 0; } /** @see jp.co.omronsoft.openwnn.WnnEngine#deleteWord */ public boolean deleteWord(WnnWord word) { mDictionaryJP.setInUseState( true ); mDictionaryJP.removeWordFromUserDictionary(word); mDictionaryJP.setInUseState( false ); return false; } /** @see jp.co.omronsoft.openwnn.WnnEngine#setPreferences */ public void setPreferences(SharedPreferences pref) {} /** @see jp.co.omronsoft.openwnn.WnnEngine#breakSequence */ public void breakSequence() { clearPreviousWord(); } /** @see jp.co.omronsoft.openwnn.WnnEngine#makeCandidateListOf */ public int makeCandidateListOf(int clausePosition) { clearCandidates(); if ((mConvertSentence == null) || (mConvertSentence.elements.size() <= clausePosition)) { return 0; } mSingleClauseMode = true; WnnClause clause = mConvertSentence.elements.get(clausePosition); mInputHiragana = clause.stroke; mInputRomaji = clause.candidate; return 1; } /** @see jp.co.omronsoft.openwnn.WnnEngine#initializeDictionary */ public boolean initializeDictionary(int dictionary) { switch( dictionary ) { case WnnEngine.DICTIONARY_TYPE_LEARN: mDictionaryJP.setInUseState( true ); mDictionaryJP.clearLearnDictionary(); mDictionaryJP.setInUseState( false ); return true; case WnnEngine.DICTIONARY_TYPE_USER: mDictionaryJP.setInUseState( true ); mDictionaryJP.clearUserDictionary(); mDictionaryJP.setInUseState( false ); return true; } return false; } /** @see jp.co.omronsoft.openwnn.WnnEngine#initializeDictionary */ public boolean initializeDictionary(int dictionary, int type) { return initializeDictionary(dictionary); } /** @see jp.co.omronsoft.openwnn.WnnEngine#getUserDictionaryWords */ public WnnWord[] getUserDictionaryWords( ) { /* get words in the user dictionary */ mDictionaryJP.setInUseState(true); WnnWord[] result = mDictionaryJP.getUserDictionaryWords( ); mDictionaryJP.setInUseState(false); /* sort the array of words */ Arrays.sort(result, new WnnWordComparator()); return result; } /* {@link WnnWord} comparator for listing up words in the user dictionary */ private class WnnWordComparator implements java.util.Comparator { public int compare(Object object1, Object object2) { WnnWord wnnWord1 = (WnnWord) object1; WnnWord wnnWord2 = (WnnWord) object2; return wnnWord1.stroke.compareTo(wnnWord2.stroke); } } }