package com.samsung.nlepd.bert;

import com.google.common.base.Ascii;
import com.google.common.collect.Iterables;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

/* loaded from: classes2.dex */
public final class BasicTokenizer {
    private final boolean doLowerCase;

    public BasicTokenizer(boolean z11) {
        this.doLowerCase = z11;
    }

    public static String cleanText(String str) {
        if (str == null) {
            throw new NullPointerException("The input String is null.");
        }
        StringBuilder sb = new StringBuilder("");
        for (int i7 = 0; i7 < str.length(); i7++) {
            char charAt = str.charAt(i7);
            if (!CharChecker.isInvalid(charAt) && !CharChecker.isControl(charAt)) {
                if (CharChecker.isWhitespace(charAt)) {
                    sb.append(" ");
                } else {
                    sb.append(charAt);
                }
            }
        }
        return sb.toString();
    }

    public static List<String> runSplitOnPunc(String str) {
        if (str == null) {
            throw new NullPointerException("The input String is null.");
        }
        ArrayList arrayList = new ArrayList();
        boolean z11 = true;
        for (int i7 = 0; i7 < str.length(); i7++) {
            char charAt = str.charAt(i7);
            if (CharChecker.isPunctuation(charAt)) {
                arrayList.add(String.valueOf(charAt));
                z11 = true;
            } else {
                if (z11) {
                    arrayList.add("");
                    z11 = false;
                }
                arrayList.set(arrayList.size() - 1, ((String) Iterables.getLast(arrayList)) + charAt);
            }
        }
        return arrayList;
    }

    public static List<String> whitespaceTokenize(String str) {
        if (str != null) {
            return Arrays.asList(str.split(" "));
        }
        throw new NullPointerException("The input String is null.");
    }

    public List<String> tokenize(String str) {
        List<String> whitespaceTokenize = whitespaceTokenize(cleanText(str));
        StringBuilder sb = new StringBuilder();
        for (String str2 : whitespaceTokenize) {
            if (this.doLowerCase) {
                str2 = Ascii.toLowerCase(str2);
            }
            Iterator<String> it = runSplitOnPunc(str2).iterator();
            while (it.hasNext()) {
                sb.append(it.next());
                sb.append(" ");
            }
        }
        return whitespaceTokenize(sb.toString());
    }
}
