package org.luwrain.nlp.ru;

import java.util.ArrayList;
import java.util.List;
import org.luwrain.nlp.ru.Token;

/* loaded from: input_file:org/luwrain/nlp/ru/AbstractTokenizer.class */
public abstract class AbstractTokenizer {
    protected final List<Token> output = new ArrayList();

    abstract char getCh();

    public abstract boolean hasCh();

    public abstract void backCh(char c);

    public void tokenize() {
        while (hasCh()) {
            char ch = getCh();
            if (ch >= '0' && ch <= '9') {
                onNumToken(ch);
            } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
                onLatinToken(ch);
            } else if ((ch >= 1072 && ch <= 1103) || ((ch >= 1040 && ch <= 1071) || ch == 1105 || ch == 1025)) {
                onCyrilToken(ch);
            } else if (Character.isWhitespace(ch)) {
                onSpaceToken(ch);
            } else {
                onPuncToken(ch);
            }
        }
    }

    private void onNumToken(char c) {
        StringBuilder sb = new StringBuilder();
        sb.append(c);
        while (hasCh()) {
            char ch = getCh();
            if (ch < '0' || ch > '9') {
                backCh(ch);
                break;
            }
            sb.append(ch);
        }
        this.output.add(new Token(Token.Type.NUM, new String(sb)));
    }

    private void onLatinToken(char c) {
        StringBuilder sb = new StringBuilder();
        sb.append(c);
        while (hasCh()) {
            char ch = getCh();
            if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z')) {
                backCh(ch);
                break;
            }
            sb.append(ch);
        }
        this.output.add(new Token(Token.Type.LATIN, new String(sb)));
    }

    private void onCyrilToken(char c) {
        StringBuilder sb = new StringBuilder();
        sb.append(c);
        while (true) {
            if (!hasCh()) {
                break;
            }
            char ch = getCh();
            if ((ch < 1072 || ch > 1103) && !((ch >= 1040 && ch <= 1071) || ch == 1105 || ch == 1025)) {
                backCh(ch);
                break;
            }
            sb.append(ch);
        }
        this.output.add(new Token(Token.Type.CYRIL, new String(sb)));
    }

    private void onSpaceToken(char c) {
        StringBuilder sb = new StringBuilder();
        sb.append(c);
        while (true) {
            if (!hasCh()) {
                break;
            }
            char ch = getCh();
            if (!Character.isWhitespace(ch)) {
                backCh(ch);
                break;
            }
            sb.append(ch);
        }
        this.output.add(new Token(Token.Type.SPACE, new String(sb)));
    }

    private void onPuncToken(char c) {
        this.output.add(new Token(Token.Type.PUNC, new Character(c).toString()));
    }

    public Token[] getOutput() {
        return (Token[]) this.output.toArray(new Token[this.output.size()]);
    }
}
