/*
 * Decompiled with CFR 0.152.
 */
package org.omegat.core.matching.external.lucene;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.omegat.core.data.PrepareTMXEntry;
import org.omegat.util.Log;

public class NgramAnalyzer
extends Analyzer {
    public Set<String> stopNgrams = null;

    public void calcstopNgrams(Iterable<PrepareTMXEntry> entries, File dest) throws IOException {
        HashMap<String, Integer> ngrams = new HashMap<String, Integer>();
        int count = 0;
        for (PrepareTMXEntry prepareTMXEntry : entries) {
            TokenStream in = this.tokenStream("", new StringReader(prepareTMXEntry.source));
            CharTermAttribute termAtt = (CharTermAttribute)in.addAttribute(CharTermAttribute.class);
            in.reset();
            while (in.incrementToken()) {
                String term = termAtt.toString();
                if (ngrams.get(term) == null) {
                    ngrams.put(term, 1);
                    continue;
                }
                ngrams.put(term, (Integer)ngrams.get(term) + 1);
            }
            in.end();
            in.close();
            ++count;
        }
        this.stopNgrams = new HashSet<String>();
        for (Map.Entry entry : ngrams.entrySet()) {
            if ((Integer)entry.getValue() <= count / 2) continue;
            this.stopNgrams.add((String)entry.getKey());
        }
        File destFile = new File(dest, "stop");
        try (BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(destFile), StandardCharsets.UTF_8));){
            for (String text : this.stopNgrams) {
                bufferedWriter.write(text);
                bufferedWriter.newLine();
            }
        }
    }

    public void loadstopNgrams(String filename) {
        try (BufferedReader br2 = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(filename), StandardCharsets.UTF_8));){
            String thisLine;
            this.stopNgrams = new HashSet<String>();
            while ((thisLine = br2.readLine()) != null) {
                this.stopNgrams.add(thisLine);
            }
        }
        catch (FileNotFoundException br2) {
        }
        catch (Exception other) {
            Log.log(other);
        }
    }

    public Analyzer.TokenStreamComponents createComponents(String fieldName) {
        NGramTokenizer source = new NGramTokenizer(4, 4);
        LowerCaseFilter stream = new LowerCaseFilter((TokenStream)source);
        if (this.stopNgrams != null) {
            stream = new StopFilter((TokenStream)stream, new CharArraySet(this.stopNgrams, true));
        }
        return new Analyzer.TokenStreamComponents((Tokenizer)source, (TokenStream)stream);
    }
}

