/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tagging.disambiguation;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.tagging.disambiguation.Disambiguator;

public class MultiWordChunker
implements Disambiguator {
    private Map<String, Integer> mStartSpace;
    private Map<String, Integer> mStartNoSpace;
    private Map<String, String> mFull;
    private final String filename;

    public MultiWordChunker(String filename) {
        this.filename = filename;
    }

    private void lazyInit() throws IOException {
        if (this.mStartSpace != null) {
            return;
        }
        this.mStartSpace = new HashMap<String, Integer>();
        this.mStartNoSpace = new HashMap<String, Integer>();
        this.mFull = new HashMap<String, String>();
        List<String> posTokens = this.loadWords(JLanguageTool.getDataBroker().getFromResourceDirAsStream(this.filename));
        for (String posToken : posTokens) {
            String[] firstTokens;
            String[] tokenAndTag = posToken.split("\t");
            boolean containsSpace = tokenAndTag[0].indexOf(32) > 0;
            String firstToken = "";
            if (!containsSpace) {
                firstTokens = new String[tokenAndTag[0].length()];
                firstToken = tokenAndTag[0].substring(0, 1);
                for (int i = 1; i < tokenAndTag[0].length(); ++i) {
                    firstTokens[i] = tokenAndTag[0].substring(0 + (i - 1), i);
                }
                if (this.mStartNoSpace.containsKey(firstToken)) {
                    if (this.mStartNoSpace.get(firstToken) < firstTokens.length) {
                        this.mStartNoSpace.put(firstToken, firstTokens.length);
                    }
                } else {
                    this.mStartNoSpace.put(firstToken, firstTokens.length);
                }
            } else {
                firstTokens = tokenAndTag[0].split(" ");
                firstToken = firstTokens[0];
                if (this.mStartSpace.containsKey(firstToken)) {
                    if (this.mStartSpace.get(firstToken) < firstTokens.length) {
                        this.mStartSpace.put(firstToken, firstTokens.length);
                    }
                } else {
                    this.mStartSpace.put(firstToken, firstTokens.length);
                }
            }
            this.mFull.put(tokenAndTag[0], tokenAndTag[1]);
        }
    }

    @Override
    public final AnalyzedSentence disambiguate(AnalyzedSentence input) throws IOException {
        AnalyzedTokenReadings[] anTokens;
        this.lazyInit();
        AnalyzedTokenReadings[] output = anTokens = input.getTokens();
        for (int i = 0; i < anTokens.length; ++i) {
            int j;
            int len;
            String tok = output[i].getToken();
            StringBuilder tokens = new StringBuilder();
            int finalLen = 0;
            if (this.mStartSpace.containsKey(tok)) {
                len = this.mStartSpace.get(tok);
                j = i;
                int lenCounter = 0;
                while (j < anTokens.length) {
                    if (!anTokens[j].isWhitespace()) {
                        tokens.append(anTokens[j].getToken());
                        String toks = tokens.toString();
                        if (this.mFull.containsKey(toks)) {
                            output[i] = this.prepareNewReading(toks, tok, output[i], false);
                            output[finalLen] = this.prepareNewReading(toks, anTokens[finalLen].getToken(), output[finalLen], true);
                        }
                        if (++lenCounter == len) break;
                        tokens.append(' ');
                    }
                    finalLen = ++j;
                }
            }
            if (!this.mStartNoSpace.containsKey(tok) || i + (len = this.mStartNoSpace.get(tok).intValue()) > anTokens.length) continue;
            for (j = i; j < i + len; ++j) {
                tokens.append(anTokens[j].getToken());
                String toks = tokens.toString();
                if (!this.mFull.containsKey(toks)) continue;
                output[i] = this.prepareNewReading(toks, tok, output[i], false);
                output[i + len - 1] = this.prepareNewReading(toks, anTokens[i + len - 1].getToken(), output[i + len - 1], true);
            }
        }
        return new AnalyzedSentence(output);
    }

    private AnalyzedTokenReadings prepareNewReading(String tokens, String tok, AnalyzedTokenReadings token, boolean isLast) {
        StringBuilder sb = new StringBuilder();
        sb.append("<");
        if (isLast) {
            sb.append("/");
        }
        sb.append(this.mFull.get(tokens));
        sb.append(">");
        AnalyzedToken tokenStart = new AnalyzedToken(tok, sb.toString(), tokens);
        return this.setAndAnnotate(token, tokenStart);
    }

    private AnalyzedTokenReadings setAndAnnotate(AnalyzedTokenReadings oldReading, AnalyzedToken newReading) {
        String old = oldReading.toString();
        String prevAnot = oldReading.getHistoricalAnnotations();
        AnalyzedTokenReadings newAtr = new AnalyzedTokenReadings(oldReading.getReadings(), oldReading.getStartPos());
        newAtr.setWhitespaceBefore(oldReading.isWhitespaceBefore());
        newAtr.addReading(newReading);
        newAtr.setHistoricalAnnotations(this.annotateToken(prevAnot, old, newAtr.toString()));
        return newAtr;
    }

    private String annotateToken(String prevAnot, String oldReading, String newReading) {
        StringBuilder sb = new StringBuilder();
        sb.append(prevAnot);
        sb.append("\nMULTIWORD_CHUNKER: ");
        sb.append(oldReading);
        sb.append(" -> ");
        sb.append(newReading);
        return sb.toString();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private List<String> loadWords(InputStream stream) throws IOException {
        ArrayList<String> lines = new ArrayList<String>();
        Scanner scanner = new Scanner(stream, "UTF-8");
        try {
            while (scanner.hasNextLine()) {
                String line = scanner.nextLine().trim();
                if (line.length() < 1 || line.charAt(0) == '#') continue;
                lines.add(line);
            }
        }
        finally {
            scanner.close();
        }
        return lines;
    }
}

