Java程序  |  614行  |  20.5 KB

/*
 * Copyright (C) 2008-2012  OMRON SOFTWARE Co., Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package jp.co.omronsoft.openwnn.JAJP;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Arrays;

import jp.co.omronsoft.openwnn.CandidateFilter;
import jp.co.omronsoft.openwnn.ComposingText;
import jp.co.omronsoft.openwnn.OpenWnn;
import jp.co.omronsoft.openwnn.OpenWnnDictionaryImpl;
import jp.co.omronsoft.openwnn.StrSegmentClause;
import jp.co.omronsoft.openwnn.WnnClause;
import jp.co.omronsoft.openwnn.WnnDictionary;
import jp.co.omronsoft.openwnn.WnnEngine;
import jp.co.omronsoft.openwnn.WnnSentence;
import jp.co.omronsoft.openwnn.WnnWord;
import android.content.SharedPreferences;
import android.util.Log;

/**
 * The OpenWnn engine class for Japanese IME.
 * 
 * @author Copyright (C) 2009-2011 OMRON SOFTWARE CO., LTD.  All Rights Reserved.
 */
public class OpenWnnEngineJAJP implements WnnEngine {
    /** Current dictionary type */
    private int mDictType = DIC_LANG_INIT;
    /** Dictionary type (default) */
    public static final int DIC_LANG_INIT = 0;
    /** Dictionary type (Japanese standard) */
    public static final int DIC_LANG_JP = 0;
    /** Dictionary type (English standard) */
    public static final int DIC_LANG_EN = 1;
    /** Dictionary type (Japanese person's name) */
    public static final int DIC_LANG_JP_PERSON_NAME = 2;
    /** Dictionary type (User dictionary) */
    public static final int DIC_USERDIC = 3;
    /** Dictionary type (Japanese EISU-KANA conversion) */
    public static final int DIC_LANG_JP_EISUKANA = 4;
    /** Dictionary type (e-mail/URI) */
    public static final int DIC_LANG_EN_EMAIL_ADDRESS = 5;
    /** Dictionary type (Japanese postal address) */
    public static final int DIC_LANG_JP_POSTAL_ADDRESS = 6;

    /** Type of the keyboard */
    private int mKeyboardType = KEYBOARD_UNDEF;
    /** Keyboard type (not defined) */
    public static final int KEYBOARD_UNDEF = 0;
    /** Keyboard type (12-keys) */
    public static final int KEYBOARD_KEYPAD12 = 1;
    /** Keyboard type (Qwerty) */
    public static final int KEYBOARD_QWERTY = 2;
    
    /** Score(frequency value) of word in the learning dictionary */
    public static final int FREQ_LEARN = 600;
    /** Score(frequency value) of word in the user dictionary */
    public static final int FREQ_USER = 500;

    /** Maximum limit length of output */
    public static final int MAX_OUTPUT_LENGTH = 50;
    /** Limitation of predicted candidates */
    public static final int PREDICT_LIMIT = 100;

    /** Limitation of candidates one-line */
    public static final int LIMIT_OF_CANDIDATES_1LINE = 500;

    /** OpenWnn dictionary */
    private WnnDictionary mDictionaryJP;

    /** Word list */
    private ArrayList<WnnWord> mConvResult;

    /** HashMap for checking duplicate word */
    private HashMap<String, WnnWord> mCandTable;

    /** Input string (Hiragana) */
    private String mInputHiragana;
    
    /** Input string (Romaji) */
    private String mInputRomaji;
    
    /** Number of output candidates */
    private int mOutputNum;
    
    /**
     * Where to get the next candidates from.<br>
     * (0:prefix search from the dictionary, 1:single clause converter, 2:Kana converter)
     */
    private int mGetCandidateFrom;
    
    /** Previously selected word */
    private WnnWord mPreviousWord;

    /** Converter for single/consecutive clause conversion */
    private OpenWnnClauseConverterJAJP mClauseConverter;

    /** Kana converter (for EISU-KANA conversion) */
    private KanaConverter mKanaConverter;

    /** Whether exact match search or prefix match search */
    private boolean mExactMatchMode;

    /** Whether displaying single clause candidates or not */
    private boolean mSingleClauseMode;

    /** A result of consecutive clause conversion */
    private WnnSentence mConvertSentence;
    
    /** The candidate filter */
    private CandidateFilter mFilter = null;

    /**
     * Constructor
     * 
     * @param writableDictionaryName    Writable dictionary file name(null if not use)
     */
    public OpenWnnEngineJAJP(String writableDictionaryName) {
        /* load Japanese dictionary library */
        mDictionaryJP = new OpenWnnDictionaryImpl(
                "/data/data/jp.co.omronsoft.openwnn/lib/libWnnJpnDic.so",
                writableDictionaryName );
        if (!mDictionaryJP.isActive()) {
            mDictionaryJP = new OpenWnnDictionaryImpl(
                    "/system/lib/libWnnJpnDic.so",
                    writableDictionaryName );
        }

        /* clear dictionary settings */
        mDictionaryJP.clearDictionary();
        mDictionaryJP.clearApproxPattern();
        mDictionaryJP.setInUseState(false);

        /* work buffers */
        mConvResult = new ArrayList<WnnWord>();
        mCandTable = new HashMap<String, WnnWord>();

        /* converters */
        mClauseConverter = new OpenWnnClauseConverterJAJP();
        mKanaConverter = new KanaConverter();
    }

    /**
     * Set dictionary for prediction.
     * 
     * @param strlen        Length of input string
     */
    private void setDictionaryForPrediction(int strlen) {
        WnnDictionary dict = mDictionaryJP;

        dict.clearDictionary();

        if (mDictType != DIC_LANG_JP_EISUKANA) {
            dict.clearApproxPattern();
            if (strlen == 0) {
                dict.setDictionary(2, 245, 245);
                dict.setDictionary(3, 100, 244);
                
                dict.setDictionary(WnnDictionary.INDEX_LEARN_DICTIONARY, FREQ_LEARN, FREQ_LEARN);
            } else {
                dict.setDictionary(0, 100, 400);
                if (strlen > 1) {
                    dict.setDictionary(1, 100, 400);
                }
                dict.setDictionary(2, 245, 245);
                dict.setDictionary(3, 100, 244);
                
                dict.setDictionary(WnnDictionary.INDEX_USER_DICTIONARY, FREQ_USER, FREQ_USER);
                dict.setDictionary(WnnDictionary.INDEX_LEARN_DICTIONARY, FREQ_LEARN, FREQ_LEARN);
                if (mKeyboardType != KEYBOARD_QWERTY) {
                    dict.setApproxPattern(WnnDictionary.APPROX_PATTERN_JAJP_12KEY_NORMAL);
                }
            }
        }
    }

    /**
     * Get a candidate.
     *
     * @param index     Index of a candidate.
     * @return          The candidate; {@code null} if there is no candidate.
     */
    private WnnWord getCandidate(int index) {
        WnnWord word;

        if (mGetCandidateFrom == 0) {
            if (mDictType == OpenWnnEngineJAJP.DIC_LANG_JP_EISUKANA) {
                /* skip to Kana conversion if EISU-KANA conversion mode */
                mGetCandidateFrom = 2;
            } else if (mSingleClauseMode) {
                /* skip to single clause conversion if single clause conversion mode */
                mGetCandidateFrom = 1;
            } else {
                if (mConvResult.size() < PREDICT_LIMIT) {
                    /* get prefix matching words from the dictionaries */
                    while (index >= mConvResult.size()) {
                        if ((word = mDictionaryJP.getNextWord()) == null) {
                            mGetCandidateFrom = 1;
                            break;
                        }
                        if (!mExactMatchMode || mInputHiragana.equals(word.stroke)) {
                            addCandidate(word);
                            if (mConvResult.size() >= PREDICT_LIMIT) {
                                mGetCandidateFrom = 1;
                                break;
                            }
                        }
                    }
                } else {
                    mGetCandidateFrom = 1;
                }
            }
        }

        /* get candidates by single clause conversion */
        if (mGetCandidateFrom == 1) {
            Iterator<?> convResult = mClauseConverter.convert(mInputHiragana);
            if (convResult != null) {
                while (convResult.hasNext()) {
                    addCandidate((WnnWord)convResult.next());
                }
            }
            /* end of candidates by single clause conversion */
            mGetCandidateFrom = 2;
        }
        
        /* get candidates from Kana converter */
        if (mGetCandidateFrom == 2) {
            List<WnnWord> addCandidateList
            = mKanaConverter.createPseudoCandidateList(mInputHiragana, mInputRomaji, mKeyboardType);
            
            Iterator<WnnWord> it = addCandidateList.iterator();
            while(it.hasNext()) {
                addCandidate(it.next());
            }

            mGetCandidateFrom = 3;
        }

        if (index >= mConvResult.size()) {
            return null;
        }
        return (WnnWord)mConvResult.get(index);
    }

    /**
     * Add a candidate to the conversion result buffer.
     * <br>
     * This method adds a word to the result buffer if there is not
     * the same one in the buffer and the length of the candidate
     * string is not longer than {@code MAX_OUTPUT_LENGTH}.
     *
     * @param word      A word to be add
     * @return          {@code true} if the word added; {@code false} if not.
     */
    private boolean addCandidate(WnnWord word) {
        if (word.candidate == null || mCandTable.containsKey(word.candidate)
                || word.candidate.length() > MAX_OUTPUT_LENGTH) {
            return false;
        }
        if (mFilter != null && !mFilter.isAllowed(word)) {
            return false;
        }
        mCandTable.put(word.candidate, word);
        mConvResult.add(word);
        return true;
    }

    /**
     * Clear work area that hold candidates information.
     */
    private void clearCandidates() {
        mConvResult.clear();
        mCandTable.clear();
        mOutputNum = 0;
        mInputHiragana = null;
        mInputRomaji = null;
        mGetCandidateFrom = 0;
        mSingleClauseMode = false;
    }

    /**
     * Set dictionary type.
     *
     * @param type      Type of dictionary
     * @return          {@code true} if the dictionary is changed; {@code false} if not.
     */
    public boolean setDictionary(int type) {
        mDictType = type;
        return true;
    }

    /**
     * Set the search key and the search mode from {@link ComposingText}.
     *
     * @param text      Input text
     * @param maxLen    Maximum length to convert
     * @return          Length of the search key
     */
    private int setSearchKey(ComposingText text, int maxLen) {
        String input = text.toString(ComposingText.LAYER1);
        if (0 <= maxLen && maxLen <= input.length()) {
            input = input.substring(0, maxLen);
            mExactMatchMode = true;
        } else {
            mExactMatchMode = false;
        }

        if (input.length() == 0) {
            mInputHiragana = "";
            mInputRomaji = "";
            return 0;
        }

        mInputHiragana = input;
        mInputRomaji = text.toString(ComposingText.LAYER0);

        return input.length();
    }

    /**
     * Clear the previous word's information.
     */
    public void clearPreviousWord() {
        mPreviousWord = null;
    }

    /**
     * Set keyboard type.
     * 
     * @param keyboardType      Type of keyboard
     */
    public void setKeyboardType(int keyboardType) {
        mKeyboardType = keyboardType;
    }

    /**
     * Set the candidate filter
     * 
     * @param filter    The candidate filter
     */
    public void setFilter(CandidateFilter filter) {
        mFilter = filter;
        mClauseConverter.setFilter(filter);
    }
    
    /***********************************************************************
     * WnnEngine's interface
     **********************************************************************/
    /** @see jp.co.omronsoft.openwnn.WnnEngine#init */
    public void init() {
        clearPreviousWord();
        mClauseConverter.setDictionary(mDictionaryJP);
        mKanaConverter.setDictionary(mDictionaryJP);
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#close */
    public void close() {}

    /** @see jp.co.omronsoft.openwnn.WnnEngine#predict */
    public int predict(ComposingText text, int minLen, int maxLen) {
        clearCandidates();
        if (text == null) { return 0; }

        /* set mInputHiragana and mInputRomaji */
        int len = setSearchKey(text, maxLen);

        /* set dictionaries by the length of input */
        setDictionaryForPrediction(len);
        
        /* search dictionaries */
        mDictionaryJP.setInUseState( true );

        if (len == 0) {
            /* search by previously selected word */
            return mDictionaryJP.searchWord(WnnDictionary.SEARCH_LINK, WnnDictionary.ORDER_BY_FREQUENCY,
                                            mInputHiragana, mPreviousWord);
        } else {
            if (mExactMatchMode) {
                /* exact matching */
                mDictionaryJP.searchWord(WnnDictionary.SEARCH_EXACT, WnnDictionary.ORDER_BY_FREQUENCY,
                                         mInputHiragana);
            } else {
                /* prefix matching */
                mDictionaryJP.searchWord(WnnDictionary.SEARCH_PREFIX, WnnDictionary.ORDER_BY_FREQUENCY,
                                         mInputHiragana);
            }
            return 1;
        }
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#convert */
    public int convert(ComposingText text) {
        clearCandidates();

        if (text == null) {
            return 0;
        }

        mDictionaryJP.setInUseState( true );

        int cursor = text.getCursor(ComposingText.LAYER1);
        String input;
        WnnClause head = null;
        if (cursor > 0) {
            /* convert previous part from cursor */
            input = text.toString(ComposingText.LAYER1, 0, cursor - 1);
            Iterator headCandidates = mClauseConverter.convert(input);
            if ((headCandidates == null) || (!headCandidates.hasNext())) {
                return 0;
            }
            head = new WnnClause(input, (WnnWord)headCandidates.next());

            /* set the rest of input string */
            input = text.toString(ComposingText.LAYER1, cursor, text.size(ComposingText.LAYER1) - 1);
        } else {
            /* set whole of input string */
            input = text.toString(ComposingText.LAYER1);
        }

        WnnSentence sentence = null;
        if (input.length() != 0) {
            sentence = mClauseConverter.consecutiveClauseConvert(input);
        }
        if (head != null) {
            sentence = new WnnSentence(head, sentence);
        }
        if (sentence == null) {
            return 0;
        }

        StrSegmentClause[] ss = new StrSegmentClause[sentence.elements.size()];
        int pos = 0;
        int idx = 0;
        Iterator<WnnClause> it = sentence.elements.iterator();
        while(it.hasNext()) {
            WnnClause clause = (WnnClause)it.next();
            int len = clause.stroke.length();
            ss[idx] = new StrSegmentClause(clause, pos, pos + len - 1);
            pos += len;
            idx += 1;
        }
        text.setCursor(ComposingText.LAYER2, text.size(ComposingText.LAYER2));
        text.replaceStrSegment(ComposingText.LAYER2, ss, 
                               text.getCursor(ComposingText.LAYER2));
        mConvertSentence = sentence;

        return 0;
    }
    
    /** @see jp.co.omronsoft.openwnn.WnnEngine#searchWords */
    public int searchWords(String key) {
        clearCandidates();
        return 0;
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#searchWords */
    public int searchWords(WnnWord word) {
        clearCandidates();
        return 0;
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#getNextCandidate */
    public WnnWord getNextCandidate() {
        if (mInputHiragana == null) {
            return null;
        }
        WnnWord word = getCandidate(mOutputNum);
        if (word != null) {
            mOutputNum++;
        }
        return word;
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#learn */
    public boolean learn(WnnWord word) {
        int ret = -1;
        if (word.partOfSpeech.right == 0) {
            word.partOfSpeech = mDictionaryJP.getPOS(WnnDictionary.POS_TYPE_MEISI);
        }

        WnnDictionary dict = mDictionaryJP;
        if (word instanceof WnnSentence) {
            Iterator<WnnClause> clauses = ((WnnSentence)word).elements.iterator();
            while (clauses.hasNext()) {
                WnnWord wd = clauses.next();
                if (mPreviousWord != null) {
                    ret = dict.learnWord(wd, mPreviousWord);
                } else {
                    ret = dict.learnWord(wd);
                }
                mPreviousWord = wd;
                if (ret != 0) {
                    break;
                }
            }
        } else {
            if (mPreviousWord != null) {
                ret = dict.learnWord(word, mPreviousWord);
            } else {
                ret = dict.learnWord(word);
            }
            mPreviousWord = word;
            mClauseConverter.setDictionary(dict);
        }

        return (ret == 0);
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#addWord */
    public int addWord(WnnWord word) {
        mDictionaryJP.setInUseState( true );
        if (word.partOfSpeech.right == 0) {
            word.partOfSpeech = mDictionaryJP.getPOS(WnnDictionary.POS_TYPE_MEISI);
        }
        mDictionaryJP.addWordToUserDictionary(word);
        mDictionaryJP.setInUseState( false );
        return 0;
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#deleteWord */
    public boolean deleteWord(WnnWord word) {
        mDictionaryJP.setInUseState( true );
        mDictionaryJP.removeWordFromUserDictionary(word);
        mDictionaryJP.setInUseState( false );
        return false;
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#setPreferences */
    public void setPreferences(SharedPreferences pref) {}

    /** @see jp.co.omronsoft.openwnn.WnnEngine#breakSequence */
    public void breakSequence()  {
        clearPreviousWord();
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#makeCandidateListOf */
    public int makeCandidateListOf(int clausePosition)  {
        clearCandidates();

        if ((mConvertSentence == null) || (mConvertSentence.elements.size() <= clausePosition)) {
            return 0;
        }
        mSingleClauseMode = true;
        WnnClause clause = mConvertSentence.elements.get(clausePosition);
        mInputHiragana = clause.stroke;
        mInputRomaji = clause.candidate;

        return 1;
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#initializeDictionary */
    public boolean initializeDictionary(int dictionary)  {
        switch( dictionary ) {
        case WnnEngine.DICTIONARY_TYPE_LEARN:
            mDictionaryJP.setInUseState( true );
            mDictionaryJP.clearLearnDictionary();
            mDictionaryJP.setInUseState( false );
            return true;

        case WnnEngine.DICTIONARY_TYPE_USER:
            mDictionaryJP.setInUseState( true );
            mDictionaryJP.clearUserDictionary();
            mDictionaryJP.setInUseState( false );
            return true;
        }
        return false;
    }

    /** @see jp.co.omronsoft.openwnn.WnnEngine#initializeDictionary */
    public boolean initializeDictionary(int dictionary, int type) {
        return initializeDictionary(dictionary);
    }
    
    /** @see jp.co.omronsoft.openwnn.WnnEngine#getUserDictionaryWords */
    public WnnWord[] getUserDictionaryWords( ) {
        /* get words in the user dictionary */
        mDictionaryJP.setInUseState(true);
        WnnWord[] result = mDictionaryJP.getUserDictionaryWords( );
        mDictionaryJP.setInUseState(false);

        /* sort the array of words */
        Arrays.sort(result, new WnnWordComparator());

        return result;
    }

    /* {@link WnnWord} comparator for listing up words in the user dictionary */
    private class WnnWordComparator implements java.util.Comparator {
        public int compare(Object object1, Object object2) {
            WnnWord wnnWord1 = (WnnWord) object1;
            WnnWord wnnWord2 = (WnnWord) object2;
            return wnnWord1.stroke.compareTo(wnnWord2.stroke);
        }
    }
}