/*
 * Decompiled with CFR 0.152.
 */
package org.omegat.tokenizer;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
import java.util.regex.Matcher;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ja.JapaneseAnalyzer;
import org.apache.lucene.analysis.ja.JapaneseTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.omegat.tokenizer.BaseTokenizer;
import org.omegat.tokenizer.Tokenizer;
import org.omegat.util.PatternConsts;

@Tokenizer(languages={"ja"}, isDefault=true)
public class LuceneJapaneseTokenizer
extends BaseTokenizer {
    public LuceneJapaneseTokenizer() {
        this.shouldDelegateTokenizeExactly = false;
    }

    @Override
    protected TokenStream getTokenStream(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed) throws IOException {
        if (stemsAllowed) {
            strOrig = this.blankOutTags(strOrig);
            CharArraySet stopWords = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET;
            Set stopTags = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopTags() : Collections.emptySet();
            return new JapaneseAnalyzer(null, JapaneseTokenizer.Mode.SEARCH, stopWords, stopTags).tokenStream("", (Reader)new StringReader(strOrig));
        }
        JapaneseTokenizer tokenizer = new JapaneseTokenizer(null, false, JapaneseTokenizer.Mode.NORMAL);
        tokenizer.setReader((Reader)new StringReader(strOrig));
        return new TagJoiningFilter((TokenStream)tokenizer);
    }

    private String blankOutTags(String text) {
        StringBuilder buffer = new StringBuilder(text);
        Matcher m = PatternConsts.OMEGAT_TAG.matcher(text);
        while (m.find()) {
            int end = m.end();
            for (int i = m.start(); i < end; ++i) {
                buffer.setCharAt(i, ' ');
            }
        }
        return buffer.toString();
    }

    private static class TagJoiningFilter
    extends TokenFilter {
        private static final int BUFFER_INITIAL_SIZE = 5;
        private final CharTermAttribute termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
        private final OffsetAttribute offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
        private StringBuilder buffer = new StringBuilder(5);
        private int startOffset = -1;
        private boolean buffering = false;
        private final ArrayDeque<CachedToken> inputStack = new ArrayDeque();
        private final ArrayDeque<CachedToken> outputStack = new ArrayDeque();
        private final ArrayDeque<CachedToken> recoveryStack = new ArrayDeque();

        protected TagJoiningFilter(TokenStream input) {
            super(input);
        }

        public boolean incrementToken() throws IOException {
            if (!this.outputStack.isEmpty()) {
                this.replayToken(this.outputStack.poll());
                return true;
            }
            while (this.getNextInput()) {
                char[] chars = this.termAtt.buffer();
                int len = this.termAtt.length();
                if (this.buffering) {
                    if (this.finishBuffering(chars, len)) {
                        return true;
                    }
                    if (this.cancelBuffering(chars, len)) {
                        return true;
                    }
                    this.cacheRecoveryToken(chars, len);
                    this.buffer.append(chars, 0, len);
                    continue;
                }
                if (this.startBuffering(chars, len)) continue;
                return true;
            }
            return this.finishToken();
        }

        private boolean getNextInput() throws IOException {
            if (!this.inputStack.isEmpty()) {
                this.replayToken(this.inputStack.poll());
                return true;
            }
            return this.input.incrementToken();
        }

        private boolean startBuffering(char[] chars, int len) {
            for (int i = 0; i < len; ++i) {
                if (!this.isTagOpen(chars[i])) continue;
                if (i > 0) {
                    this.cacheInputToken(Arrays.copyOfRange(chars, i, len), this.offsetAtt.startOffset() + i);
                    this.truncateToken(i);
                    return false;
                }
                this.buffer.append(chars, i, len);
                this.startOffset = this.offsetAtt.startOffset();
                this.cacheRecoveryToken(chars, len);
                this.buffering = true;
                return true;
            }
            return false;
        }

        private void truncateToken(int end) {
            this.termAtt.setLength(end);
            this.offsetAtt.setOffset(this.offsetAtt.startOffset(), this.offsetAtt.startOffset() + end);
        }

        private boolean isTagOpen(char c) {
            return c == '<' || c == '{';
        }

        private boolean cancelBuffering(char[] chars, int len) {
            for (int i = 0; i < len; ++i) {
                if (this.isTagContent(chars[i])) continue;
                this.cacheRecoveryToken(chars, len);
                this.outputStack.addAll(this.recoveryStack);
                this.recoveryStack.clear();
                this.replayToken(this.outputStack.poll());
                this.clearBuffer();
                return true;
            }
            return false;
        }

        private boolean isTagContent(char c) {
            return c == '/' || Character.isLetterOrDigit(c);
        }

        private void replayToken(CachedToken t) {
            this.termAtt.copyBuffer(t.chars, 0, t.chars.length);
            this.termAtt.setLength(t.chars.length);
            this.offsetAtt.setOffset(t.startOffset, t.startOffset + t.chars.length);
        }

        private boolean finishBuffering(char[] chars, int len) {
            for (int i = 0; i < len; ++i) {
                if (!this.isTagClose(chars[i])) continue;
                if (i < len - 1) {
                    this.cacheInputToken(Arrays.copyOfRange(chars, i + 1, len), this.offsetAtt.startOffset() + i + 1);
                }
                this.buffer.append(chars, 0, i + 1);
                return this.finishToken();
            }
            return false;
        }

        private boolean isTagClose(char c) {
            char open = this.buffer.charAt(0);
            return open == '<' && c == '>' || open == '{' && c == '}';
        }

        private boolean finishToken() {
            if (this.buffer.length() == 0) {
                return false;
            }
            String token = this.buffer.toString();
            this.termAtt.copyBuffer(token.toCharArray(), 0, token.length());
            this.termAtt.setLength(token.length());
            this.offsetAtt.setOffset(this.startOffset, this.startOffset + token.length());
            this.clearBuffer();
            this.recoveryStack.clear();
            return true;
        }

        private void clearBuffer() {
            this.buffer = new StringBuilder(5);
            this.buffering = false;
        }

        private void cacheInputToken(char[] chars, int start) {
            this.inputStack.add(new CachedToken(chars, start));
        }

        private void cacheRecoveryToken(char[] chars, int len) {
            this.recoveryStack.add(new CachedToken(Arrays.copyOf(chars, len), this.offsetAtt.startOffset()));
        }

        private static class CachedToken {
            public final char[] chars;
            public final int startOffset;

            CachedToken(char[] chars, int startOffset) {
                this.chars = chars;
                this.startOffset = startOffset;
            }
        }
    }
}

