/*
 * Decompiled with CFR 0.152.
 */
package net.loomchild.maligna.filter.modifier.modify.clean;

import java.util.List;
import net.loomchild.maligna.filter.modifier.modify.clean.CleanAlgorithm;
import net.loomchild.maligna.filter.modifier.modify.split.SplitAlgorithm;
import net.loomchild.maligna.model.vocabulary.Vocabulary;
import net.loomchild.maligna.model.vocabulary.VocabularyUtil;

public class UnifyRareWordsCleanAlgorithm
extends CleanAlgorithm {
    public static final String DEFAULT_OTHER_WORD = "{OTHER}";
    private Vocabulary vocabulary;
    private SplitAlgorithm splitAlgorithm;
    private String otherWord;

    public UnifyRareWordsCleanAlgorithm(Vocabulary vocabulary, SplitAlgorithm splitAlgorithm, String otherWord) {
        this.vocabulary = vocabulary;
        this.splitAlgorithm = splitAlgorithm;
        this.otherWord = otherWord;
    }

    public UnifyRareWordsCleanAlgorithm(Vocabulary vocabulary) {
        this(vocabulary, VocabularyUtil.DEFAULT_TOKENIZE_ALGORITHM, DEFAULT_OTHER_WORD);
    }

    @Override
    public String clean(String segment) {
        List<String> wordList = this.splitAlgorithm.split(segment);
        StringBuilder resultSegment = new StringBuilder();
        for (String word : wordList) {
            if (resultSegment.length() > 0) {
                resultSegment.append(" ");
            }
            if (this.vocabulary.containsWord(word)) {
                resultSegment.append(word);
                continue;
            }
            resultSegment.append(this.otherWord);
        }
        return resultSegment.toString();
    }
}

