/*
 * Copyright (C) 2008,2009  OMRON SOFTWARE Co., Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package jp.co.omronsoft.openwnn.JAJP;

import jp.co.omronsoft.openwnn.*;
import java.util.*;

/**
 * The penWnn Clause Converter class for Japanese IME.
 * 
 * @author Copyright (C) 2009 OMRON SOFTWARE CO., LTD.  All Rights Reserved.
 */
public class OpenWnnClauseConverterJAJP {
    /** Score(frequency value) of word in the learning dictionary */
    private static final int FREQ_LEARN = 600;
    /** Score(frequency value) of word in the user dictionary */
    private static final int FREQ_USER  = 500;

    /** Maximum limit length of input */
    public static final int MAX_INPUT_LENGTH = 50;

    /** search cache for unique independent words (jiritsugo) */
    private HashMap<String, ArrayList<WnnWord>> mIndepWordBag;
    /** search cache for all independent words (jiritsugo) */
    private HashMap<String, ArrayList<WnnWord>> mAllIndepWordBag;
    /** search cache for ancillary words (fuzokugo) */
    private HashMap<String, ArrayList<WnnWord>> mFzkPatterns;

    /** connect matrix for generating a clause */
    private byte[][] mConnectMatrix;

    /** dictionaries */
    private WnnDictionary mDictionary;

    /** candidates of conversion */
    private LinkedList mConvertResult;

    /** work area for consecutive clause conversion */
    private WnnSentence[] mSentenceBuffer;

    /** part of speech (default) */
    private WnnPOS mPosDefault;
    /** part of speech (end of clause/not end of sentence) */
    private WnnPOS mPosEndOfClause1;
    /** part of speech (end of clause/any place) */
    private WnnPOS mPosEndOfClause2;
    /** part of speech (end of sentence) */
    private WnnPOS mPosEndOfClause3;

    /** cost value of a clause */
    private static final int CLAUSE_COST = -1000;
    
    /** The candidate filter */
    private CandidateFilter mFilter = null;

    /**
     * Constructor
     */
    public OpenWnnClauseConverterJAJP() {
        mIndepWordBag  = new HashMap<String, ArrayList<WnnWord>>();
        mAllIndepWordBag  = new HashMap<String, ArrayList<WnnWord>>();
        mFzkPatterns   = new HashMap();
        mConvertResult = new LinkedList();

        mSentenceBuffer = new WnnSentence[MAX_INPUT_LENGTH];
    }

    /**
     * Set the dictionary
     * 
     * @param dict  The dictionary for phrase conversion
     */
    public void setDictionary(WnnDictionary dict) {
        /* get connect matrix */
        mConnectMatrix = dict.getConnectMatrix();

        /* clear dictionary settings */
        mDictionary = dict;
        dict.clearDictionary();
        dict.clearApproxPattern();                    

        /* clear work areas */
        mIndepWordBag.clear();
        mAllIndepWordBag.clear();
        mFzkPatterns.clear();
        
        /* get part of speech tags */
        mPosDefault      = dict.getPOS(WnnDictionary.POS_TYPE_MEISI);
        mPosEndOfClause1 = dict.getPOS(WnnDictionary.POS_TYPE_V1);
        mPosEndOfClause2 = dict.getPOS(WnnDictionary.POS_TYPE_V2);
        mPosEndOfClause3 = dict.getPOS(WnnDictionary.POS_TYPE_V3);
    }
    
    /**
     * Set the candidate filter
     * 
     * @param filter	The candidate filter
     */
    public void setFilter(CandidateFilter filter) {
    	mFilter = filter;
    }

    /**
     * Kana-to-Kanji conversion (single clause).
     * <br>
     * This method execute single clause conversion.
      *
     * @param input		The input string
     * @return			The candidates of conversion; {@code null} if an error occurs.
     */
     public Iterator convert(String input) {
        /* do nothing if no dictionary is specified */
        if (mConnectMatrix == null || mDictionary == null) {
            return null;
        }
        /* do nothing if the length of input exceeds the limit */
        if (input.length() > MAX_INPUT_LENGTH) {
            return null;
        }

        /* clear the candidates list */
        mConvertResult.clear();

        /* try single clause conversion */
        if (!singleClauseConvert(mConvertResult, input, mPosEndOfClause2, true)) {
            return null;
        }
        return mConvertResult.iterator();
    }

    /**
     * Consecutive clause conversion.
     *
     * @param input		The input string
     * @return			The result of consecutive clause conversion; {@code null} if fail.
     */
    public WnnSentence consecutiveClauseConvert(String input) {
        LinkedList clauses = new LinkedList();

        /* clear the cache which is not matched */
        for (int i = 0; i < input.length(); i++) {
            mSentenceBuffer[i] = null;
        }
        WnnSentence[] sentence = mSentenceBuffer;

        /* consecutive clause conversion */
        for (int start = 0; start < input.length(); start++) {
            if (start != 0 && sentence[start-1] == null) {
                continue;
            }

            /* limit the length of a clause */
            int end = input.length();
            if (end > start + 20) {
                end = start + 20;
            }
            /* make clauses */
            for ( ; end > start; end--) {
                int idx = end - 1;

                /* cutting a branch */
                if (sentence[idx] != null) {
                    if (start != 0) {
                        if (sentence[idx].frequency > sentence[start-1].frequency + CLAUSE_COST + FREQ_LEARN) {
                            /* there may be no way to be the best sequence from the 'start' */
                            break;
                        }
                    } else {
                        if (sentence[idx].frequency > CLAUSE_COST + FREQ_LEARN) {
                            /* there may be no way to be the best sequence from the 'start' */
                            break;
                        }
                    }
                }

                String key = input.substring(start, end);
                clauses.clear();
                WnnClause bestClause = null;
                if (end == input.length()) {
                    /* get the clause which can be the end of the sentence */
                    singleClauseConvert(clauses, key, mPosEndOfClause1, false);
                } else {
                    /* get the clause which is not the end of the sentence */
                    singleClauseConvert(clauses, key, mPosEndOfClause3, false);
                }
                if (clauses.isEmpty()) {
                    bestClause = defaultClause(key);
                } else {
                    bestClause = (WnnClause)clauses.get(0);
                }

                /* make a sub-sentence */
                WnnSentence ws;
                if (start == 0) {
                    ws = new WnnSentence(key, bestClause);
                } else {
                    ws = new WnnSentence(sentence[start-1], bestClause);
                }
                ws.frequency += CLAUSE_COST;

                /* update the best sub-sentence on the cache buffer */
                if (sentence[idx] == null || (sentence[idx].frequency < ws.frequency)) {
                    sentence[idx] = ws;
                }
            }
        }

        /* return the result of the consecutive clause conversion */
        if (sentence[input.length() - 1] != null) {
            return sentence[input.length() - 1];
        }
        return null;
    }

    /**
     * Consecutive clause conversion.
     *
     * @param resultList	Where to store the result
     * @param input			Input string
     * @return				{@code true} if success; {@code false} if fail.
     */
    private boolean consecutiveClauseConvert(LinkedList resultList, String input) {
        WnnSentence sentence = consecutiveClauseConvert(input);

        /* set the result of the consecutive clause conversion on the top of the list */
        if (sentence != null) {
            resultList.add(0, sentence);
            return true;
        }
        return false;
    }

    /**
     * Single clause conversion.
     *
     * @param clauseList	Where to store the results
     * @param input			Input string
     * @param terminal		Part of speech tag at the terminal
     * @param all			Get all candidates or not
     * @return				{@code true} if success; {@code false} if fail.
     */
    private boolean singleClauseConvert(LinkedList clauseList, String input, WnnPOS terminal, boolean all) {
        boolean ret = false;

        /* get clauses without ancillary word */
        ArrayList<WnnWord> stems = getIndependentWords(input, all);
        if (stems != null && (!stems.isEmpty())) {
            Iterator<WnnWord> stemsi = stems.iterator();
            while (stemsi.hasNext()) {
                WnnWord stem = stemsi.next();
                if (addClause(clauseList, input, stem, null, terminal, all)) {
                    ret = true;
                }
            }
        }

        /* get clauses with ancillary word */
        int max = CLAUSE_COST * 2;
        for (int split = 1; split < input.length(); split++) {
            /* get ancillary patterns */
            String str = input.substring(split);
            ArrayList<WnnWord> fzks = getAncillaryPattern(str);
            if (fzks == null || fzks.isEmpty()) {
                continue;
            }
            
            /* get candidates of stem in a clause */
            str = input.substring(0, split);
            stems = getIndependentWords(str, all);
            if (stems == null || stems.isEmpty()) {
                if (mDictionary.searchWord(WnnDictionary.SEARCH_PREFIX, WnnDictionary.ORDER_BY_FREQUENCY, str) <= 0) {
                    break;
                } else {
                    continue;
                }
            }
            /* make clauses */
            Iterator<WnnWord> stemsi = stems.iterator();
            while (stemsi.hasNext()) {
                WnnWord stem = stemsi.next();
                if (all || stem.frequency > max) {
                    Iterator<WnnWord> fzksi  = fzks.iterator();
                    while (fzksi.hasNext()) {
                        WnnWord fzk = fzksi.next();
                        if (addClause(clauseList, input, stem, fzk, terminal, all)) {
                            ret = true;
                            max = stem.frequency;
                        }
                    }
                }
            }
        }
        return ret;
    }

    /**
     * Add valid clause to the candidates list.
     *
     * @param clauseList	Where to store the results
     * @param input			Input string
     * @param stem			Stem of the clause (a independent word)
     * @param fzk			Ancillary pattern
     * @param terminal		Part of speech tag at the terminal
     * @param all			Get all candidates or not
     * @return				{@code true} if add the clause to the list; {@code false} if not.
     */
    private boolean addClause(LinkedList<WnnClause> clauseList, String input, WnnWord stem, WnnWord fzk,
                              WnnPOS terminal, boolean all) {
        WnnClause clause = null;
        /* check if the part of speech is valid */
        if (fzk == null) {
            if (connectible(stem.partOfSpeech.right, terminal.left)) {
                clause = new WnnClause(input, stem);
            }
        } else {
            if (connectible(stem.partOfSpeech.right, fzk.partOfSpeech.left)
                && connectible(fzk.partOfSpeech.right, terminal.left)) {
                clause = new WnnClause(input, stem, fzk);
            }
        }
        if (clause == null) {
            return false;
        }
        if (mFilter != null && !mFilter.isAllowed(clause)) {
        	return false;
        }

        /* store to the list */
        if (clauseList.isEmpty()) {
            /* add if the list is empty */
            clauseList.add(0, clause);
            return true;
        } else {
            if (!all) {
                /* reserve only the best clause */
                WnnClause best = (WnnClause)clauseList.get(0);
                if (best.frequency < clause.frequency) {
                    clauseList.set(0, clause);
                    return true;
                }
            } else {
                /* reserve all clauses */
                Iterator clauseListi = clauseList.iterator();
                int index = 0;
                while (clauseListi.hasNext()) {
                    WnnClause clausei = (WnnClause)clauseListi.next();
                    if (clausei.frequency < clause.frequency) {
                        break;
                    }
                    index++;
                }
                clauseList.add(index, clause);
                return true;
            }
        }

        return false;
    }

    /**
     * Check the part-of-speeches are connectable.
     *
     * @param right		Right attribute of the preceding word/clause
     * @param left		Left attribute of the following word/clause
     * @return			{@code true} if there are connectable; {@code false} if otherwise
     */
    private boolean connectible(int right, int left) {
        try {
            if (mConnectMatrix[left][right] != 0) {
                return true;
            }
        } catch (Exception ex) {
        }
        return false;
    }

    /**
     * Get all exact matched ancillary words(Fuzokugo) list.
     *
     * @param input		Search key
     * @return			List of ancillary words
     */
    private ArrayList<WnnWord> getAncillaryPattern(String input) {
        if (input.length() == 0) {
            return null;
        }

        HashMap<String,ArrayList<WnnWord>> fzkPat = mFzkPatterns;
        ArrayList<WnnWord> fzks = fzkPat.get(input);
        if (fzks != null) {
            return fzks;
        }

        /* set dictionaries */
        WnnDictionary dict = mDictionary;
        dict.clearDictionary();
        dict.clearApproxPattern();                    
        dict.setDictionary(6, 400, 500);

        for (int start = input.length() - 1; start >= 0; start--) {
            String key = input.substring(start);

            fzks = fzkPat.get(key);
            if (fzks != null) {
                continue;
            }

            fzks = new ArrayList<WnnWord>();
            mFzkPatterns.put(key, fzks);

            /* search ancillary words */
            dict.searchWord(WnnDictionary.SEARCH_EXACT, WnnDictionary.ORDER_BY_FREQUENCY, key);
            WnnWord word;
            while ((word = dict.getNextWord()) != null) {
                fzks.add(word);
            }

            /* concatenate sequence of ancillary words */
            for (int end = input.length() - 1; end > start; end--) {
                ArrayList<WnnWord> followFzks = fzkPat.get(input.substring(end));
                if (followFzks == null ||  followFzks.isEmpty()) {
                    continue;
                }
                dict.searchWord(WnnDictionary.SEARCH_EXACT, WnnDictionary.ORDER_BY_FREQUENCY, input.substring(start, end));
                while ((word = dict.getNextWord()) != null) {
                    Iterator<WnnWord> followFzksi = followFzks.iterator();
                    while (followFzksi.hasNext()) {
                        WnnWord follow = followFzksi.next();
                        if (connectible(word.partOfSpeech.right, follow.partOfSpeech.left)) {
                            fzks.add(new WnnWord(key, key, new WnnPOS(word.partOfSpeech.left, follow.partOfSpeech.right)));
                        }
                    }
                }
            }
        }
        return fzks;
    }

    /**
     * Get all exact matched independent words(Jiritsugo) list.
     *
     * @param input    Search key
     * @param all      {@code true} if list all words; {@code false} if list words which has an unique part of speech tag.
     * @return			List of words; {@code null} if {@code input.length() == 0}.
     */
    private ArrayList<WnnWord> getIndependentWords(String input, boolean all) {
        if (input.length() == 0) {
            return null;
        }

        ArrayList<WnnWord> words = (all)? mAllIndepWordBag.get(input) : mIndepWordBag.get(input);
        
        if (words == null) {
            /* set dictionaries */
            WnnDictionary dict = mDictionary;
            dict.clearDictionary();
            dict.clearApproxPattern();                    
            dict.setDictionary(4, 0, 10);   
            dict.setDictionary(5, 400, 500);
            dict.setDictionary(WnnDictionary.INDEX_USER_DICTIONARY, FREQ_USER, FREQ_USER); 
            dict.setDictionary(WnnDictionary.INDEX_LEARN_DICTIONARY, FREQ_LEARN, FREQ_LEARN);

            words = new ArrayList<WnnWord>();
            WnnWord word;
            if (all) {
            	mAllIndepWordBag.put(input, words);
                dict.searchWord(WnnDictionary.SEARCH_EXACT, WnnDictionary.ORDER_BY_FREQUENCY, input);
                /* store all words */
                while ((word = dict.getNextWord()) != null) {
                    if (input.equals(word.stroke)) {
                        words.add(word);
                    }
                }
            } else {
            	mIndepWordBag.put(input, words);
                dict.searchWord(WnnDictionary.SEARCH_EXACT, WnnDictionary.ORDER_BY_FREQUENCY, input);
                /* store a word which has an unique part of speech tag */
                while ((word = dict.getNextWord()) != null) {
                    if (input.equals(word.stroke)) {
                        Iterator<WnnWord> list = words.iterator();
                        boolean found = false;
                        while (list.hasNext()) {
                            WnnWord w = (WnnWord)list.next();
                                if (w.partOfSpeech.right == word.partOfSpeech.right) {
                                    found = true;
                                    break;
                                }
                        }
                        if (!found) {
                            words.add(word);
                        }
                        if (word.frequency < 400) {
                            break;
                        }
                    }
                }
            }
            addAutoGeneratedCandidates(input, words, all);
        }
        return words;
    }
    
    /**
     * Add some words not including in the dictionary.
     * <br>
     * This method adds some words which are not in the dictionary.
     *
     * @param input     Input string
     * @param wordList  List to store words
     * @param all       Get all candidates or not
     */
    private void addAutoGeneratedCandidates(String input, ArrayList wordList, boolean all) {
        wordList.add(new WnnWord(input, input, mPosDefault, (CLAUSE_COST - 1) * input.length()));
    }

    /**
     * Get a default clause.
     * <br>
     * This method generates a clause which has a string same as input
     * and the default part-of-speech tag.
     *
     * @param input    Input string
     * @return			Default clause
     */
    private WnnClause defaultClause(String input) {
        return (new WnnClause(input, input, mPosDefault, (CLAUSE_COST - 1) * input.length()));
    }
}