/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tokenizers;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.languagetool.tokenizers.Tokenizer;

public class SentenceTokenizer
implements Tokenizer {
    protected static final String EOS = "\u0000";
    protected static final String P = "[\\.!?]";
    protected static final String AP = "(?:'|\u00ab|\"||\\)|\\]|\\})?";
    protected static final String PAP = "[\\.!?](?:'|\u00ab|\"||\\)|\\]|\\})?";
    protected static final String PARENS = "[\\(\\)\\[\\]]";
    private Pattern paragraph;
    private static final Pattern paragraphByTwoLineBreaks = Pattern.compile("([\\n\\r]\\s*[\\n\\r])");
    private static final Pattern paragraphByLineBreak = Pattern.compile("([\\n\\r])");
    private static final Pattern punctWhitespace = Pattern.compile("([\\.!?](?:'|\u00ab|\"||\\)|\\]|\\})?(\u0002)?\\s)");
    private static final Pattern punctUpperLower = Pattern.compile("([\\.!?](?:'|\u00ab|\"||\\)|\\]|\\})?)([\\p{Lu}][^\\p{Lu}.])");
    private static final Pattern letterPunct = Pattern.compile("(\\s[\\w\u00fc\u00f6\u00e4\u00dc\u00d6\u00c4\u00df][\\.!?])");
    private static final Pattern abbrev1 = Pattern.compile("([^-\\w\u00fc\u00f6\u00e4\u00dc\u00d6\u00c4\u00df][\\w\u00fc\u00f6\u00e4\u00dc\u00d6\u00c4\u00df][\\.!?](?:'|\u00ab|\"||\\)|\\]|\\})?\\s)\u0000");
    private static final Pattern abbrev2 = Pattern.compile("([^-\\w\u00fc\u00f6\u00e4\u00dc\u00d6\u00c4\u00df][\\w\u00fc\u00f6\u00e4\u00dc\u00d6\u00c4\u00df][\\.!?])\u0000");
    private static final Pattern abbrev3 = Pattern.compile("(\\s[\\w\u00fc\u00f6\u00e4\u00dc\u00d6\u00c4\u00df]\\.\\s+)\u0000");
    private static final Pattern abbrev4 = Pattern.compile("(\\.\\.\\. )\u0000([\\p{Ll}])");
    private static final Pattern abbrev5 = Pattern.compile("(['\"][\\.!?]['\"]\\s+)\u0000");
    private static final Pattern abbrev6 = Pattern.compile("([\"']\\s*)\u0000(\\s*[\\p{Ll}])");
    private static final Pattern abbrev7 = Pattern.compile("(\\s[\\.!?](?:'|\u00ab|\"||\\)|\\]|\\})?\\s)\u0000");
    private static final Pattern abbrev8 = Pattern.compile("(\\d{1,2}\\.\\d{1,2}\\.\\s+)\u0000");
    private static final Pattern repair1 = Pattern.compile("('[\\w\u00fc\u00f6\u00e4\u00dc\u00d6\u00c4\u00df][\\.!?])(\\s)");
    private static final Pattern repair2 = Pattern.compile("(\\sno\\.)(\\s+)(?!\\d)");
    private static final Pattern repair3 = Pattern.compile("([ap]\\.m\\.\\s+)([\\p{Lu}])");
    private static final Pattern repair10 = Pattern.compile("([\\(\\[])([!?]+)([\\]\\)]) \u0000");
    private static final Pattern repair11 = Pattern.compile("([!?]+)([\\)\\]]) \u0000");
    private static final Pattern repair12 = Pattern.compile("([\\(\\)\\[\\]]) \u0000");
    private static final String[] ABBREV_LIST = new String[]{"Mr", "Mrs", "No", "pp", "St", "no", "Sr", "Jr", "Bros", "etc", "vs", "esp", "Fig", "fig", "Jan", "Feb", "Mar", "Apr", "Jun", "Jul", "Aug", "Sep", "Sept", "Oct", "Okt", "Nov", "Dec", "Ph.D", "PhD", "al", "cf", "Inc", "Ms", "Gen", "Sen", "Prof", "Corp", "Co"};
    private final Set<Pattern> abbreviationPatterns = new HashSet<Pattern>();
    protected String[] monthNames;

    public SentenceTokenizer() {
        this(new String[0]);
    }

    public SentenceTokenizer(String[] abbrevList) {
        ArrayList<String> allAbbreviations = new ArrayList<String>();
        allAbbreviations.addAll(Arrays.asList(abbrevList));
        allAbbreviations.addAll(Arrays.asList(ABBREV_LIST));
        for (String element : allAbbreviations) {
            Pattern pattern = Pattern.compile("(\\b" + element + PAP + "\\s)" + EOS);
            this.abbreviationPatterns.add(pattern);
        }
        this.setSingleLineBreaksMarksParagraph(false);
    }

    public void setSingleLineBreaksMarksParagraph(boolean lineBreakParagraphs) {
        this.paragraph = lineBreakParagraphs ? paragraphByLineBreak : paragraphByTwoLineBreaks;
    }

    public boolean singleLineBreaksMarksPara() {
        return this.paragraph == paragraphByLineBreak;
    }

    @Override
    public List<String> tokenize(String s) {
        s = this.firstSentenceSplitting(s);
        s = this.removeFalseEndOfSentence(s);
        s = this.splitUnsplitStuff(s);
        StringTokenizer stringTokenizer = new StringTokenizer(s, EOS);
        ArrayList<String> l = new ArrayList<String>();
        while (stringTokenizer.hasMoreTokens()) {
            String sentence = stringTokenizer.nextToken();
            l.add(sentence);
        }
        return l;
    }

    private String firstSentenceSplitting(String s) {
        s = this.paragraph.matcher(s).replaceAll("$1\u0000");
        s = punctWhitespace.matcher(s).replaceAll("$1\u0000");
        s = punctUpperLower.matcher(s).replaceAll("$1\u0000$2");
        s = letterPunct.matcher(s).replaceAll("$1\u0000");
        return s;
    }

    protected String removeFalseEndOfSentence(String s) {
        s = abbrev1.matcher(s).replaceAll("$1");
        s = abbrev2.matcher(s).replaceAll("$1");
        s = abbrev3.matcher(s).replaceAll("$1");
        s = abbrev4.matcher(s).replaceAll("$1$2");
        s = abbrev5.matcher(s).replaceAll("$1");
        for (Pattern abbrevPattern : this.abbreviationPatterns) {
            Matcher matcher = abbrevPattern.matcher(s);
            s = matcher.replaceAll("$1");
        }
        s = abbrev6.matcher(s).replaceAll("$1$2");
        s = abbrev7.matcher(s).replaceAll("$1");
        s = abbrev8.matcher(s).replaceAll("$1");
        if (this.monthNames != null) {
            for (String element : this.monthNames) {
                s = s.replaceAll("(\\d+\\.) \u0000(" + element + ")", "$1 $2");
            }
        }
        s = repair10.matcher(s).replaceAll("$1$2$3 ");
        s = repair11.matcher(s).replaceAll("$1$2 ");
        s = repair12.matcher(s).replaceAll("$1 ");
        return s;
    }

    private String splitUnsplitStuff(String s) {
        s = repair1.matcher(s).replaceAll("$1\u0000$2");
        s = repair2.matcher(s).replaceAll("$1\u0000$2");
        s = repair3.matcher(s).replaceAll("$1\u0000$2");
        return s;
    }
}

