package ai.promethist.text;

import ai.promethist.type.MaskingText;
import ai.promethist.type.Text;
import ai.promethist.type.Token;
import ai.promethist.util.Logger;
import ai.promethist.util.LoggerDelegate;
import com.azure.core.util.tracing.Tracer;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.Triple;
import kotlin.collections.CollectionsKt;
import kotlin.io.TextStreamsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.functions.Function2;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.PropertyReference1Impl;
import kotlin.jvm.internal.Reflection;
import kotlin.reflect.KProperty;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import kotlin.text.RegexOption;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.reactivestreams.Publisher;
import org.springframework.cglib.core.Constants;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;

/* compiled from: TextSplitter.kt */
@Metadata(mv = {2, 0, 0}, k = 1, xi = 48, d1 = {"��V\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\t\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\u000b\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\f\n��\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0006\bÆ\u0002\u0018��2\u00020\u0001B\t\b\u0002¢\u0006\u0004\b\u0002\u0010\u0003J\u0010\u0010#\u001a\u00020\u001a2\u0006\u0010$\u001a\u00020\u001aH\u0002J*\u0010%\u001a\b\u0012\u0004\u0012\u00020'0&2\f\u0010(\u001a\b\u0012\u0004\u0012\u00020)0&2\u000e\b\u0002\u0010*\u001a\b\u0012\u0004\u0012\u00020+0\u0019JO\u0010,\u001a\u0018\u0012\b\u0012\u00060/j\u0002`.\u0012\u0004\u0012\u00020\u001a\u0012\u0004\u0012\u00020\u001a0-2\n\u00100\u001a\u00060/j\u0002`.2\u0006\u00101\u001a\u00020)2\b\u00102\u001a\u0004\u0018\u00010)2\f\u0010*\u001a\b\u0012\u0004\u0012\u00020+0\u0019H\u0002¢\u0006\u0002\u00103J\u0014\u00104\u001a\b\u0012\u0004\u0012\u00020\u001a0\u00192\u0006\u0010$\u001a\u00020\u001aR\u001b\u0010\u0004\u001a\u00020\u00058BX\u0082\u0084\u0002¢\u0006\f\n\u0004\b\b\u0010\t\u001a\u0004\b\u0006\u0010\u0007R\u000e\u0010\n\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\f\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\r\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u000e\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u000f\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0010\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0011\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0012\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0013\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u0018\u0010\u0014\u001a\n \u0016*\u0004\u0018\u00010\u00150\u0015X\u0082\u0004¢\u0006\u0004\n\u0002\u0010\u0017R\u0014\u0010\u0018\u001a\b\u0012\u0004\u0012\u00020\u001a0\u0019X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u001b\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u0018\u0010\u001c\u001a\n \u0016*\u0004\u0018\u00010\u00150\u0015X\u0082\u0004¢\u0006\u0004\n\u0002\u0010\u0017R\u0014\u0010\u001d\u001a\b\u0012\u0004\u0012\u00020\u001a0\u0019X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u001e\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u001f\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010 \u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010!\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\"\u001a\u00020\u000bX\u0082\u0004¢\u0006\u0002\n��¨\u00065"}, d2 = {"Lai/promethist/text/TextSplitter;", "", Constants.CONSTRUCTOR_NAME, "()V", "logger", "Lai/promethist/util/Logger;", "getLogger", "()Lai/promethist/util/Logger;", "logger$delegate", "Lai/promethist/util/LoggerDelegate;", "multipleDots", "Lkotlin/text/Regex;", "numberWithDotAndLowercaseWord", "wordWithDotAndLowercaseWord", "dateYearRegex", "shortDateRegex", "blankTokenRegex", "numberRegex", "initialsRegex", "urlRegex", "abbreviationsInputStream", "Ljava/io/InputStream;", "kotlin.jvm.PlatformType", "Ljava/io/InputStream;", "abbreviationsExamples", "", "", "abbreviationsRegex", "monthsInputStream", "monthsExamples", "monthsRegex", "bracketText", "quotedText", "hashtagPattern", "lastRealCharacterIsDot", "replaceNotSeparatingDots", "text", "segments", "Lreactor/core/publisher/Flux;", "Lai/promethist/type/Text;", "tokens", "Lai/promethist/type/Token;", "separators", "", "splitToSegments", "Lkotlin/Triple;", "Lkotlin/text/StringBuilder;", "Ljava/lang/StringBuilder;", Tracer.SPAN_BUILDER_KEY, "currentToken", "nextToken", "(Ljava/lang/StringBuilder;Lai/promethist/type/Token;Lai/promethist/type/Token;Ljava/util/List;)Lkotlin/Triple;", "splitTextToSentences", "promethist-common"})
/* loaded from: input_file:ai/promethist/text/TextSplitter.class */
public final class TextSplitter {

    @NotNull
    private static final List<String> abbreviationsExamples;

    @NotNull
    private static final Regex abbreviationsRegex;
    private static final InputStream monthsInputStream;

    @NotNull
    private static final List<String> monthsExamples;

    @NotNull
    private static final Regex monthsRegex;

    @NotNull
    private static final Regex bracketText;

    @NotNull
    private static final Regex quotedText;

    @NotNull
    private static final Regex hashtagPattern;

    @NotNull
    private static final Regex lastRealCharacterIsDot;
    static final /* synthetic */ KProperty<Object>[] $$delegatedProperties = {Reflection.property1(new PropertyReference1Impl(TextSplitter.class, "logger", "getLogger()Lai/promethist/util/Logger;", 0))};

    @NotNull
    public static final TextSplitter INSTANCE = new TextSplitter();

    @NotNull
    private static final LoggerDelegate logger$delegate = new LoggerDelegate();

    @NotNull
    private static final Regex multipleDots = new Regex("([.]+)[.]");

    @NotNull
    private static final Regex numberWithDotAndLowercaseWord = new Regex("(\\d+)\\.(\\s+[a-zàáâäæãåāçćčďèéêëěēėęìíîïīįłñńòóôöøōõßřśšťùúûüūÿýżźžà-ÿ]+)");

    @NotNull
    private static final Regex wordWithDotAndLowercaseWord = new Regex("(\\w+)\\.(\\s+[a-zàáâäæãåāçćčďèéêëěēėęìíîïīįłñńòóôöøōõßřśšťùúûüūÿýżźžà-ÿ]+)");

    @NotNull
    private static final Regex dateYearRegex = new Regex("\\b(\\d{1,2})[.∯](\\s?\\d{1,2})[.∯](\\s*)([']|[']?\\d{2}|\\d{4})");

    @NotNull
    private static final Regex shortDateRegex = new Regex("\\b(\\d{1,2})[.∯](\\s?\\d{1,2})");

    @NotNull
    private static final Regex blankTokenRegex = new Regex("^\\s+$");

    @NotNull
    private static final Regex numberRegex = new Regex("(\\d+)\\.(\\d+)");

    @NotNull
    private static final Regex initialsRegex = new Regex("\\b[A-ZÀÁÂÄÆÃÅĀÇĆČĎÈÉÊËĚĒĖĘÌÍÎÏĪĮŁÑŃÒÓÔÖØŌÕŘŚŠŤÙÚÛÜŪŸÝŻŹŽÀ-Ÿ]\\.");

    @NotNull
    private static final Regex urlRegex = new Regex("[a-zA-Z][-a-zA-Z0-9@:%._\\+~#=]{0,255}\\.[a-zA-Z][a-zA-Z0-9()]{0,5}\\b([.][-a-zA-Z0-9()@:%_\\+~#?&//=]+)*");
    private static final InputStream abbreviationsInputStream = INSTANCE.getClass().getResourceAsStream("/abbreviations.txt");

    private TextSplitter() {
    }

    private final Logger getLogger() {
        return logger$delegate.getValue((Object) this, $$delegatedProperties[0]);
    }

    private final String replaceNotSeparatingDots(String str) {
        String str2 = str;
        Iterator it2 = CollectionsKt.listOf((Object[]) new Regex[]{urlRegex, bracketText, quotedText, abbreviationsRegex, initialsRegex, monthsRegex, numberWithDotAndLowercaseWord, wordWithDotAndLowercaseWord, dateYearRegex, shortDateRegex, numberRegex}).iterator();
        while (it2.hasNext()) {
            str2 = replaceNotSeparatingDots$replaceDotsWithSpecialChar(str2, (Regex) it2.next());
        }
        return multipleDots.replace(str2, TextSplitter::replaceNotSeparatingDots$lambda$3);
    }

    @NotNull
    public final Flux<Text> segments(@NotNull Flux<Token> tokens, @NotNull List<Character> separators) {
        Intrinsics.checkNotNullParameter(tokens, "tokens");
        Intrinsics.checkNotNullParameter(separators, "separators");
        Function1 function1 = TextSplitter::segments$lambda$4;
        Flux<V> map = tokens.map((v1) -> {
            return segments$lambda$5(r1, v1);
        });
        Function1 function12 = TextSplitter::segments$lambda$6;
        Flux buffer = map.flatMap((v1) -> {
            return segments$lambda$7(r1, v1);
        }).buffer(2, 1);
        Function1 function13 = TextSplitter::segments$lambda$8;
        Flux concatMap = buffer.concatMap((v1) -> {
            return segments$lambda$9(r1, v1);
        });
        Triple triple = new Triple(new StringBuilder(), "", CollectionsKt.emptyList());
        Function2 function2 = (v1, v2) -> {
            return segments$lambda$10(r2, v1, v2);
        };
        Flux scan = concatMap.scan(triple, (v1, v2) -> {
            return segments$lambda$11(r2, v1, v2);
        });
        Function1 function14 = TextSplitter::segments$lambda$12;
        Flux<Text> flatMapIterable = scan.flatMapIterable((v1) -> {
            return segments$lambda$13(r1, v1);
        });
        Intrinsics.checkNotNullExpressionValue(flatMapIterable, "flatMapIterable(...)");
        return flatMapIterable;
    }

    public static /* synthetic */ Flux segments$default(TextSplitter textSplitter, Flux flux, List list, int i, Object obj) {
        if ((i & 2) != 0) {
            list = ValuesKt.getSegmentSeparators();
        }
        return textSplitter.segments(flux, list);
    }

    /* JADX WARN: Code restructure failed: missing block: B:9:0x003c, code lost:
    
        if (r0 == null) goto L13;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private final kotlin.Triple<java.lang.StringBuilder, java.lang.String, java.lang.String> splitToSegments(java.lang.StringBuilder r11, ai.promethist.type.Token r12, ai.promethist.type.Token r13, java.util.List<java.lang.Character> r14) {
        /*
            Method dump skipped, instructions count: 633
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.promethist.text.TextSplitter.splitToSegments(java.lang.StringBuilder, ai.promethist.type.Token, ai.promethist.type.Token, java.util.List):kotlin.Triple");
    }

    @NotNull
    public final List<String> splitTextToSentences(@NotNull String text) {
        Intrinsics.checkNotNullParameter(text, "text");
        Flux just = Flux.just(new Token(text));
        TextSplitter textSplitter = INSTANCE;
        Intrinsics.checkNotNull(just);
        Flux segments$default = segments$default(textSplitter, just, null, 2, null);
        Function1 function1 = TextSplitter::splitTextToSentences$lambda$16;
        Flux map = segments$default.map((v1) -> {
            return splitTextToSentences$lambda$17(r1, v1);
        });
        Intrinsics.checkNotNullExpressionValue(map, "map(...)");
        List<String> list = (List) map.collectList().block();
        return list == null ? CollectionsKt.emptyList() : list;
    }

    private static final CharSequence abbreviationsRegex$lambda$0(String it2) {
        Intrinsics.checkNotNullParameter(it2, "it");
        return "\\b" + Regex.Companion.escape(it2);
    }

    private static final CharSequence monthsRegex$lambda$1(String it2) {
        Intrinsics.checkNotNullParameter(it2, "it");
        return "\\b" + Regex.Companion.escape(it2) + "\\b";
    }

    private static final CharSequence replaceNotSeparatingDots$replaceDotsWithSpecialChar$lambda$2(MatchResult matchResult) {
        Intrinsics.checkNotNullParameter(matchResult, "matchResult");
        return StringsKt.replace$default(matchResult.getValue(), ".", "∯", false, 4, (Object) null);
    }

    private static final String replaceNotSeparatingDots$replaceDotsWithSpecialChar(String str, Regex regex) {
        return regex.replace(str, TextSplitter::replaceNotSeparatingDots$replaceDotsWithSpecialChar$lambda$2);
    }

    private static final CharSequence replaceNotSeparatingDots$lambda$3(MatchResult it2) {
        Intrinsics.checkNotNullParameter(it2, "it");
        return StringsKt.repeat("∯", it2.getGroupValues().get(1).length()) + ".";
    }

    private static final Flux segments$lambda$4(Token token) {
        return token instanceof MaskingText ? Flux.concat(Mono.just(token), Mono.just(new Token(""))) : Flux.just(token);
    }

    private static final Flux segments$lambda$5(Function1 tmp0, Object obj) {
        Intrinsics.checkNotNullParameter(tmp0, "$tmp0");
        return (Flux) tmp0.mo649invoke(obj);
    }

    private static final Publisher segments$lambda$6(Flux flux) {
        return flux;
    }

    private static final Publisher segments$lambda$7(Function1 tmp0, Object obj) {
        Intrinsics.checkNotNullParameter(tmp0, "$tmp0");
        return (Publisher) tmp0.mo649invoke(obj);
    }

    private static final Publisher segments$lambda$8(List list) {
        Token token = (Token) list.get(0);
        Intrinsics.checkNotNull(list);
        return Mono.just(new Pair(token, (Token) CollectionsKt.getOrNull(list, 1)));
    }

    private static final Publisher segments$lambda$9(Function1 tmp0, Object obj) {
        Intrinsics.checkNotNullParameter(tmp0, "$tmp0");
        return (Publisher) tmp0.mo649invoke(obj);
    }

    private static final Triple segments$lambda$10(List separators, Triple triple, Pair pair) {
        Intrinsics.checkNotNullParameter(separators, "$separators");
        Token token = (Token) pair.component1();
        Token token2 = (Token) pair.component2();
        StringBuilder sb = (StringBuilder) triple.component1();
        TextSplitter textSplitter = INSTANCE;
        Intrinsics.checkNotNull(token);
        Triple<StringBuilder, String, String> splitToSegments = textSplitter.splitToSegments(sb, token, token2, separators);
        ArrayList arrayList = new ArrayList();
        if ((splitToSegments.getSecond().length() > 0) && !StringsKt.endsWith$default((CharSequence) splitToSegments.getSecond(), '|', false, 2, (Object) null)) {
            arrayList.add(new Text(splitToSegments.getSecond()));
        }
        if (token2 == null) {
            if (sb.length() > 0) {
                while (true) {
                    if (!(sb.length() > 0)) {
                        break;
                    }
                    Triple<StringBuilder, String, String> splitToSegments2 = INSTANCE.splitToSegments(sb, new Token(""), null, separators);
                    if (!(splitToSegments2.getSecond().length() > 0)) {
                        break;
                    }
                    arrayList.add(new Text(splitToSegments2.getSecond()));
                }
            }
        }
        return new Triple(sb, splitToSegments.getSecond(), arrayList);
    }

    private static final Triple segments$lambda$11(Function2 tmp0, Triple triple, Object obj) {
        Intrinsics.checkNotNullParameter(tmp0, "$tmp0");
        return (Triple) tmp0.invoke(triple, obj);
    }

    private static final Iterable segments$lambda$12(Triple triple) {
        return (Iterable) triple.getThird();
    }

    private static final Iterable segments$lambda$13(Function1 tmp0, Object obj) {
        Intrinsics.checkNotNullParameter(tmp0, "$tmp0");
        return (Iterable) tmp0.mo649invoke(obj);
    }

    private static final CharSequence splitToSegments$lambda$14(char c) {
        return String.valueOf(c);
    }

    private static final CharSequence splitToSegments$lambda$15(MatchResult it2) {
        Intrinsics.checkNotNullParameter(it2, "it");
        return StringsKt.replace$default(it2.getValue(), "∯", ".", false, 4, (Object) null);
    }

    private static final String splitTextToSentences$lambda$16(Text text) {
        return text.getText();
    }

    private static final String splitTextToSentences$lambda$17(Function1 tmp0, Object obj) {
        Intrinsics.checkNotNullParameter(tmp0, "$tmp0");
        return (String) tmp0.mo649invoke(obj);
    }

    static {
        InputStream inputStream = abbreviationsInputStream;
        Intrinsics.checkNotNull(inputStream);
        Charset UTF_8 = StandardCharsets.UTF_8;
        Intrinsics.checkNotNullExpressionValue(UTF_8, "UTF_8");
        Reader inputStreamReader = new InputStreamReader(inputStream, UTF_8);
        abbreviationsExamples = TextStreamsKt.readLines(inputStreamReader instanceof BufferedReader ? (BufferedReader) inputStreamReader : new BufferedReader(inputStreamReader, 8192));
        abbreviationsRegex = new Regex(CollectionsKt.joinToString$default(abbreviationsExamples, "|", null, null, 0, null, TextSplitter::abbreviationsRegex$lambda$0, 30, null), RegexOption.IGNORE_CASE);
        monthsInputStream = INSTANCE.getClass().getResourceAsStream("/months.txt");
        InputStream inputStream2 = monthsInputStream;
        Intrinsics.checkNotNull(inputStream2);
        Charset UTF_82 = StandardCharsets.UTF_8;
        Intrinsics.checkNotNullExpressionValue(UTF_82, "UTF_8");
        Reader inputStreamReader2 = new InputStreamReader(inputStream2, UTF_82);
        monthsExamples = TextStreamsKt.readLines(inputStreamReader2 instanceof BufferedReader ? (BufferedReader) inputStreamReader2 : new BufferedReader(inputStreamReader2, 8192));
        monthsRegex = new Regex("\\d+\\.\\s*(" + CollectionsKt.joinToString$default(monthsExamples, "|", null, null, 0, null, TextSplitter::monthsRegex$lambda$1, 30, null) + ")", RegexOption.IGNORE_CASE);
        bracketText = new Regex("\\([^)]*\\)");
        quotedText = new Regex("\"[^\"]*\"");
        hashtagPattern = new Regex("#\\w+");
        lastRealCharacterIsDot = new Regex("∯\\s*$");
    }
}
