import random
from collections import defaultdict
from typing import Optional

try:
    import spacy
    nlp = spacy.load('de_core_news_sm')
except ImportError:
    print("Spacy model not found. Please run: python -m spacy download de_core_news_sm")
    raise


def tokenize_text(text: str) -> list[str]:
    doc = nlp(text)
    tokens = []
    for token in doc:
        if not token.is_space and not token.is_punct:
            tokens.append(token.text)
    return tokens


def build_markov_chain(texts: list[str], order: int = 2) -> dict:
    chain = defaultdict(list)
    
    for text in texts:
        tokens = tokenize_text(text)
        if len(tokens) <= order:
            continue
        
        for i in range(len(tokens) - order):
            key = tuple(tokens[i:i + order])
            next_word = tokens[i + order]
            chain[key].append(next_word)
    
    return chain


def generate_markov_sentence(chain: dict, max_length: int = 30, start_length: int = 2) -> Optional[str]:
    if not chain:
        return None
    
    start_keys = [k for k in chain.keys() if k[0][0].isupper() or k[0].isupper()]
    if not start_keys:
        start_keys = list(chain.keys())
    
    current = random.choice(start_keys)
    words = list(current)
    
    while len(words) < max_length:
        key = tuple(words[-start_length:])
        if key not in chain:
            break
        
        next_word = random.choice(chain[key])
        words.append(next_word)
        
        if next_word in '.!?':
            break
    
    sentence = ' '.join(words)
    
    for punct in '.!?':
        if punct in sentence:
            sentence = sentence.split(punct)[0] + punct
            break
    
    return sentence


def process_texts_for_markov(texts: list[str], order: int = 2) -> dict:
    chain = build_markov_chain(texts, order)
    return chain