package com.sixfive.can.nl.lexical.en;

import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.PluralRules;
import com.ibm.icu.util.ULocale;
import com.sixfive.can.nl.lexical.GreedyICUTokenizer;
import com.sixfive.can.nl.lexical.en.EnglishTokenizer;
import e0.c3;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/* loaded from: classes2.dex */
public class EnglishTokenizer extends GreedyICUTokenizer {
    private static final int CURRENT_VERSION = 1;
    private static final String ENGLISH_LANGUAGE_TAG = "en";
    private static final long serialVersionUID = 8833176735261391940L;
    private final Pattern ALWAYS_BREAK;
    private final String[] ALWAYS_BREAK_PATTERNS;
    private static final CharMatcher INSIGNIFICANT_PUNCTUATION = CharMatcher.anyOf("!?,.-/:&(){}[]");
    private static final CharMatcher POSSESSIVE = CharMatcher.anyOf("sS");
    private static final CharMatcher APOSTROPHE = CharMatcher.anyOf("'’");
    private static final ContractionSet CONTRACTIONS = new ContractionSet(Contraction.of("what", "is", "s"), Contraction.of("when", "is", "s"), Contraction.of("where", "is", "s"), Contraction.of("who", "is", "s"), Contraction.of("why", "is", "s"), Contraction.of("how", "is", "s"), Contraction.of("i", "am", "m"), Contraction.of("i", "would", DateFormat.DAY), Contraction.of("you", "are", "re"), Contraction.of("you", "would", DateFormat.DAY), Contraction.of("it", "is", "s"), Contraction.of("it", "would", DateFormat.DAY), Contraction.of("let", "us", "s"));
    private static final Normalizer NORMALIZER = new Normalizer(0);
    private static final List<List<String>> WAKEWORDS = ImmutableList.of(ImmutableList.of("Hi", "Bixby"));
    public static final Map<String, Integer> ENGLISH_NUMERIC_PHRASES = ImmutableMap.builder().put("a", 1).put("an", 1).put("couple", 2).put("a couple", 2).put("a couple of", 2).put(PluralRules.KEYWORD_FEW, 3).put("a few", 3).put("a few of", 3).put("several", 5).put("several of", 5).build();

    /* loaded from: classes2.dex */
    public static class Contraction implements Serializable {
        private static final long serialVersionUID = -8151710728247345020L;
        final String abbrev;
        final String canonical;
        final String leftWord;
        final String phrase;
        final String rightWord;

        private Contraction(String str, String str2, String str3) {
            this.leftWord = str;
            this.rightWord = str2;
            this.abbrev = str3;
            this.phrase = str + ' ' + str2;
            this.canonical = c3.k(str, str3);
        }

        public static Contraction of(String str, String str2, String str3) {
            return new Contraction(str, str2, str3);
        }
    }

    /* loaded from: classes2.dex */
    public static class ContractionSet implements Serializable {
        private static final long serialVersionUID = 8828724611623722202L;
        private final Set<String> abbrevs;
        private final Contraction[] contractions;
        private final Set<String> leftWords;
        private final Set<String> phrases;
        private final Set<String> rightWords;

        public ContractionSet(Contraction... contractionArr) {
            this.contractions = contractionArr;
            final int i7 = 0;
            this.leftWords = (Set) Arrays.stream(contractionArr).map(new Function() { // from class: com.sixfive.can.nl.lexical.en.a
                @Override // java.util.function.Function
                public final Object apply(Object obj) {
                    String str;
                    String str2;
                    String str3;
                    String str4;
                    switch (i7) {
                        case 0:
                            str2 = ((EnglishTokenizer.Contraction) obj).leftWord;
                            return str2;
                        case 1:
                            str3 = ((EnglishTokenizer.Contraction) obj).rightWord;
                            return str3;
                        case 2:
                            str4 = ((EnglishTokenizer.Contraction) obj).abbrev;
                            return str4;
                        default:
                            str = ((EnglishTokenizer.Contraction) obj).phrase;
                            return str;
                    }
                }
            }).collect(Collectors.toSet());
            final int i11 = 1;
            this.rightWords = (Set) Arrays.stream(contractionArr).map(new Function() { // from class: com.sixfive.can.nl.lexical.en.a
                @Override // java.util.function.Function
                public final Object apply(Object obj) {
                    String str;
                    String str2;
                    String str3;
                    String str4;
                    switch (i11) {
                        case 0:
                            str2 = ((EnglishTokenizer.Contraction) obj).leftWord;
                            return str2;
                        case 1:
                            str3 = ((EnglishTokenizer.Contraction) obj).rightWord;
                            return str3;
                        case 2:
                            str4 = ((EnglishTokenizer.Contraction) obj).abbrev;
                            return str4;
                        default:
                            str = ((EnglishTokenizer.Contraction) obj).phrase;
                            return str;
                    }
                }
            }).collect(Collectors.toSet());
            final int i12 = 2;
            this.abbrevs = (Set) Arrays.stream(contractionArr).map(new Function() { // from class: com.sixfive.can.nl.lexical.en.a
                @Override // java.util.function.Function
                public final Object apply(Object obj) {
                    String str;
                    String str2;
                    String str3;
                    String str4;
                    switch (i12) {
                        case 0:
                            str2 = ((EnglishTokenizer.Contraction) obj).leftWord;
                            return str2;
                        case 1:
                            str3 = ((EnglishTokenizer.Contraction) obj).rightWord;
                            return str3;
                        case 2:
                            str4 = ((EnglishTokenizer.Contraction) obj).abbrev;
                            return str4;
                        default:
                            str = ((EnglishTokenizer.Contraction) obj).phrase;
                            return str;
                    }
                }
            }).collect(Collectors.toSet());
            final int i13 = 3;
            this.phrases = (Set) Arrays.stream(contractionArr).map(new Function() { // from class: com.sixfive.can.nl.lexical.en.a
                @Override // java.util.function.Function
                public final Object apply(Object obj) {
                    String str;
                    String str2;
                    String str3;
                    String str4;
                    switch (i13) {
                        case 0:
                            str2 = ((EnglishTokenizer.Contraction) obj).leftWord;
                            return str2;
                        case 1:
                            str3 = ((EnglishTokenizer.Contraction) obj).rightWord;
                            return str3;
                        case 2:
                            str4 = ((EnglishTokenizer.Contraction) obj).abbrev;
                            return str4;
                        default:
                            str = ((EnglishTokenizer.Contraction) obj).phrase;
                            return str;
                    }
                }
            }).collect(Collectors.toSet());
        }

        public String canonicalize(String str) {
            if (!this.phrases.contains(str)) {
                return null;
            }
            for (Contraction contraction : this.contractions) {
                if (str.equals(contraction.phrase)) {
                    return contraction.canonical;
                }
            }
            return null;
        }

        public int contractionSize(GreedyICUTokenizer.WorkingUtterance workingUtterance) {
            if (workingUtterance.size() >= 3 && GreedyICUTokenizer.WHITESPACE.matchesAllOf(workingUtterance.last(1))) {
                String lowerCase = workingUtterance.last(2).toLowerCase();
                if (!this.leftWords.contains(lowerCase)) {
                    return 0;
                }
                String lowerCase2 = workingUtterance.last().toLowerCase();
                if (!this.rightWords.contains(lowerCase2)) {
                    return 0;
                }
                for (Contraction contraction : this.contractions) {
                    if (lowerCase.equals(contraction.leftWord) && lowerCase2.equals(contraction.rightWord)) {
                        return 2;
                    }
                }
            }
            return 0;
        }

        public boolean shouldRetain(String str) {
            int indexIn = EnglishTokenizer.APOSTROPHE.indexIn(str);
            if (indexIn >= 0 && indexIn != str.length() - 1) {
                String lowerCase = str.substring(0, indexIn).toLowerCase();
                if (!this.leftWords.contains(lowerCase)) {
                    return false;
                }
                String lowerCase2 = str.substring(indexIn + 1).toLowerCase();
                if (!this.abbrevs.contains(lowerCase2)) {
                    return false;
                }
                for (Contraction contraction : this.contractions) {
                    if (lowerCase.equals(contraction.leftWord) && lowerCase2.equals(contraction.abbrev)) {
                        return true;
                    }
                }
            }
            return false;
        }
    }

    /* loaded from: classes2.dex */
    public static class Normalizer implements GreedyICUTokenizer.Normalizer, Serializable {
        private static final long serialVersionUID = 6866706887647851544L;

        private Normalizer() {
        }

        public /* synthetic */ Normalizer(int i7) {
            this();
        }

        @Override // com.sixfive.can.nl.lexical.GreedyICUTokenizer.Normalizer
        public boolean isSignificant(String str) {
            return !EnglishTokenizer.INSIGNIFICANT_PUNCTUATION.matchesAllOf(str);
        }

        @Override // com.sixfive.can.nl.lexical.GreedyICUTokenizer.Normalizer
        public String normalize(String str) {
            String canonicalize = EnglishTokenizer.CONTRACTIONS.canonicalize(str);
            return canonicalize != null ? canonicalize : CharMatcher.javaLetter().matchesAnyOf(str) ? CharMatcher.javaLetterOrDigit().retainFrom(str) : str;
        }
    }

    public EnglishTokenizer(ULocale uLocale) {
        super(uLocale, NORMALIZER, WAKEWORDS);
        String[] strArr = {"(.+)(['’]s)", "(\\d+)(\\W+|[ap]m)"};
        this.ALWAYS_BREAK_PATTERNS = strArr;
        this.ALWAYS_BREAK = Pattern.compile(Joiner.on('|').join(strArr), 2);
        Preconditions.checkArgument(uLocale.getLanguage().equals(ENGLISH_LANGUAGE_TAG));
    }

    private static boolean isApostrophe(String str) {
        return str.length() == 1 && APOSTROPHE.matches(str.charAt(0));
    }

    private static boolean isPossessive(String str) {
        return str.length() == 1 && POSSESSIVE.matches(str.charAt(0));
    }

    private static int possessiveSize(GreedyICUTokenizer.WorkingUtterance workingUtterance) {
        int size = workingUtterance.size();
        if (size != 1 && isPossessive(workingUtterance.last())) {
            String last = workingUtterance.last(1);
            if (isApostrophe(last)) {
                return 1;
            }
            if (size > 2 && GreedyICUTokenizer.WHITESPACE.matchesAllOf(last) && isApostrophe(workingUtterance.last(2))) {
                return 2;
            }
        }
        return 0;
    }

    @Override // com.sixfive.can.nl.lexical.GreedyICUTokenizer
    public void adjust(GreedyICUTokenizer.WorkingUtterance workingUtterance) {
        String last = workingUtterance.last();
        ContractionSet contractionSet = CONTRACTIONS;
        if (contractionSet.shouldRetain(last)) {
            return;
        }
        Matcher matcher = this.ALWAYS_BREAK.matcher(last);
        if (!matcher.matches()) {
            int contractionSize = contractionSet.contractionSize(workingUtterance);
            if (contractionSize > 0) {
                workingUtterance.mergeLast(contractionSize);
                return;
            }
            int possessiveSize = possessiveSize(workingUtterance);
            if (possessiveSize > 0) {
                workingUtterance.mergeLast(possessiveSize);
                return;
            }
            return;
        }
        ArrayList arrayList = new ArrayList(2);
        int groupCount = matcher.groupCount();
        for (int i7 = 1; i7 <= groupCount; i7++) {
            int start = matcher.start(i7);
            if (start > 0) {
                arrayList.add(Integer.valueOf(start));
            }
        }
        workingUtterance.splitLast(arrayList);
    }

    @Override // com.sixfive.can.nl.lexical.Tokenizer
    public int version() {
        return 1;
    }
}
