Code erstellt von OpenCode.ai

2026-03-15 09:12:21 +01:00
parent f3024c2594
commit f0c6f70b52
11 changed files with 419 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,4 @@
+messages.db
+__pycache__/
+*.pyc
+.env
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,2 @@
+TELEGRAM_TOKEN=your_telegram_bot_token_here
+RANDOM_CHANCE=0.1
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+RUN python -m spacy download de_core_news_sm
+
+COPY . .
+
+RUN mkdir -p /app/data
+
+CMD ["python", "bot.py"]
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -1,2 +1,47 @@
 # ulfbot

+Ein Telegram Bot, der mithilfe von Markov-Ketten und spaCy grammatikalisch richtige Nachrichten generiert.
+
+## Features
+
+- **Markov-Kette (Order 2-3)**: Generiert Sätze basierend auf den letzten 1000 Nachrichten
+- **spaCy Integration**: Nutzt Tokenisierung und POS-Tagging für bessere Grammatik
+- **Reaktionsmodi**:
+  - Auf Mention reagieren (`@botname`)
+  - Auf Replys reagieren
+  - Zufällig in 10% der Fälle (konfigurierbar)
+- **Persistente Speicherung**: SQLite-Datenbank für Nachrichten
+- **Chat-spezifisch**: Jeder Chat hat seine eigene Wissensbasis
+
+## Installation
+
+```bash
+pip install -r requirements.txt
+python -m spacy download de_core_news_sm
+```
+
+## Docker
+
+```bash
+cp .env.example .env
+# .env mit Token bearbeiten
+docker-compose up -d --build
+```
+
+## Usage in Telegram
+
+1. Füge den Bot einer Gruppe hinzu
+2. Sage etwas im Chat
+3. Bot speichert die Nachricht automatisch
+4. Reagiere auf eine Nachricht mit Reply → Bot antwortet
+5. Oder mentioniere den Bot (`@botname`) → Bot antwortet
+6. Oder warte zufällig (10% Chance pro Nachricht) → Bot antwortet
+
+## Commands
+
+- `/start` - Start Nachricht
+- `/stats` - Anzahl gespeicherter Nachrichten im aktuellen Chat
+
+## Lizenz
+
+MIT
--- a/bot.py
+++ b/bot.py
@@ -0,0 +1,151 @@
+import logging
+import random
+from telegram import Update
+from telegram.ext import (
+    Application,
+    ContextTypes,
+    MessageHandler,
+    filters,
+    CommandHandler,
+)
+import sqlite3
+
+from config import Config
+from database import init_db, save_message, get_recent_messages, cleanup_old_messages, DB_PATH
+from markov import build_markov_chain, generate_markov_sentence
+
+
+def is_admin_or_owner(update: Update) -> tuple[bool, str]:
+    if update.message.chat.type not in ['group', 'supergroup']:
+        return True, ''
+    
+    chat_id = update.message.chat_id
+    user_id = update.message.from_user.id
+    bot_id = update.bot.id
+    
+    try:
+        chat_member = update.message.chat.get_member(bot_id)
+        status = chat_member.status
+        permissions = chat_member.permissions
+        
+        if status not in ['administrator', 'creator']:
+            return False, 'Ich benötige Admin-Rechte, um zu funktionieren.'
+        
+        if not permissions.can_send_messages:
+            return False, 'Ich benötige die Berechtigung, Nachrichten zu senden.'
+        
+        return True, ''
+    except Exception as e:
+        return False, f'Fehler beim Prüfen der Admin-Rechte: {str(e)}'
+
+
+async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    is_admin, message = is_admin_or_owner(update)
+    
+    if not is_admin:
+        if message:
+            await update.message.reply_text(message)
+        return
+    
+    await update.message.reply_text(
+        'Hallo! Ich bin ein Markov-Bot. '
+        'Sage mir etwas und ich kann später etwas Ähnliches generieren.'
+    )
+
+
+async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    message = update.message
+    chat_id = message.chat_id
+    user_id = message.from_user.id
+    username = message.from_user.username
+    text = message.text or ''
+    
+    if not text:
+        return
+    
+    save_message(chat_id, user_id, username, text)
+    
+    cleanup_old_messages(chat_id, Config.MAX_MESSAGES)
+
+
+async def generate_and_send(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    chat_id = update.message.chat_id
+    
+    recent_texts = get_recent_messages(chat_id, Config.MAX_MESSAGES)
+    
+    if not recent_texts:
+        await update.message.reply_text('Noch keine Nachrichten zum Lernen.')
+        return
+    
+    chain = build_markov_chain(recent_texts, order=2)
+    
+    if not chain:
+        await update.message.reply_text('Nicht genug Daten für Markov-Kette.')
+        return
+    
+    sentence = generate_markov_sentence(chain)
+    
+    if sentence:
+        await update.message.reply_text(sentence)
+    else:
+        await update.message.reply_text('Konnte keinen Satz generieren.')
+
+
+async def handle_mention(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    message = update.message
+    
+    if message.reply_to_message:
+        await generate_and_send(update, context)
+        return
+    
+    bot_username = context.bot.username
+    text = message.text or ''
+    
+    if f'@{bot_username}' in text:
+        await generate_and_send(update, context)
+
+
+async def random_response(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    if random.random() < Config.RANDOM_CHANCE:
+        await generate_and_send(update, context)
+
+
+async def stats(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    is_admin, message = is_admin_or_owner(update)
+    
+    if not is_admin:
+        if message:
+            await update.message.reply_text(message)
+        return
+    
+    chat_id = update.message.chat_id
+    
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute('SELECT COUNT(*) FROM messages WHERE chat_id = ?', (chat_id,))
+    count = cursor.fetchone()[0]
+    conn.close()
+    
+    await update.message.reply_text(f'Gespeicherte Nachrichten in diesem Chat: {count}')
+
+
+def main():
+    init_db()
+    
+    if not Config.TELEGRAM_TOKEN:
+        raise ValueError('TELEGRAM_TOKEN nicht gesetzt in Environment')
+    
+    application = Application.builder().token(Config.TELEGRAM_TOKEN).build()
+    
+    application.add_handler(CommandHandler('start', start))
+    application.add_handler(CommandHandler('stats', stats))
+    application.add_handler(MessageHandler(filters.TEXT & filters.REPLY, handle_mention))
+    application.add_handler(MessageHandler(filters.TEXT & (filters.ALL & ~filters.COMMAND), handle_message))
+    application.add_handler(MessageHandler(filters.TEXT & (filters.ALL & ~filters.COMMAND), random_response))
+    
+    logging.info('Bot startet...')
+    application.run_polling()
+
+
+if __name__ == '__main__':
+    main()
--- a/config.py
+++ b/config.py
@@ -0,0 +1,7 @@
+import os
+
+
+class Config:
+    TELEGRAM_TOKEN = os.getenv('TELEGRAM_TOKEN')
+    RANDOM_CHANCE = float(os.getenv('RANDOM_CHANCE', '0.1'))
+    MAX_MESSAGES = 1000
--- a/database.py
+++ b/database.py
@@ -0,0 +1,76 @@
+import sqlite3
+from typing import Optional
+
+
+DB_PATH = '/app/data/messages.db'
+
+
+def init_db():
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS messages (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            chat_id INTEGER NOT NULL,
+            user_id INTEGER NOT NULL,
+            username TEXT,
+            text TEXT NOT NULL,
+            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+        )
+    ''')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_chat ON messages(chat_id)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_timestamp ON messages(timestamp)')
+    conn.commit()
+    conn.close()
+
+
+def save_message(chat_id: int, user_id: int, username: Optional[str], text: str):
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute('''
+        INSERT INTO messages (chat_id, user_id, username, text) VALUES (?, ?, ?, ?)
+    ''', (chat_id, user_id, username, text))
+    conn.commit()
+    conn.close()
+
+
+def get_recent_messages(chat_id: int, limit: int = 1000) -> list[str]:
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute('''
+        SELECT text FROM messages 
+        WHERE chat_id = ?
+        ORDER BY timestamp DESC LIMIT ?
+    ''', (chat_id, limit))
+    messages = [row[0] for row in cursor.fetchall()]
+    conn.close()
+    return messages
+
+
+def get_all_messages_for_chat(chat_id: int) -> list[tuple[str, str]]:
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute('''
+        SELECT username, text FROM messages 
+        WHERE chat_id = ?
+        ORDER BY timestamp DESC
+    ''', (chat_id,))
+    messages = cursor.fetchall()
+    conn.close()
+    return messages
+
+
+def cleanup_old_messages(chat_id: int, keep_count: int = 1000):
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute('''
+        DELETE FROM messages 
+        WHERE id NOT IN (
+            SELECT id FROM messages 
+            WHERE chat_id = ?
+            ORDER BY timestamp DESC
+            LIMIT ?
+        )
+    ''', (chat_id, keep_count))
+    conn.commit()
+    conn.close()
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -0,0 +1,18 @@
+version: '3.8'
+
+services:
+  ulfbot:
+    build: .
+    container_name: ulfbot
+    restart: unless-stopped
+    environment:
+      - TELEGRAM_TOKEN=${TELEGRAM_TOKEN}
+      - RANDOM_CHANCE=${RANDOM_CHANCE:-0.1}
+    volumes:
+      - ./data:/app/data
+    networks:
+      - bot-network
+
+networks:
+  bot-network:
+    driver: bridge
--- a/markov.py
+++ b/markov.py
@@ -0,0 +1,72 @@
+import random
+from collections import defaultdict
+from typing import Optional
+
+try:
+    import spacy
+    nlp = spacy.load('de_core_news_sm')
+except ImportError:
+    print("Spacy model not found. Please run: python -m spacy download de_core_news_sm")
+    raise
+
+
+def tokenize_text(text: str) -> list[str]:
+    doc = nlp(text)
+    tokens = []
+    for token in doc:
+        if not token.is_space and not token.is_punct:
+            tokens.append(token.text)
+    return tokens
+
+
+def build_markov_chain(texts: list[str], order: int = 2) -> dict:
+    chain = defaultdict(list)
+    
+    for text in texts:
+        tokens = tokenize_text(text)
+        if len(tokens) <= order:
+            continue
+        
+        for i in range(len(tokens) - order):
+            key = tuple(tokens[i:i + order])
+            next_word = tokens[i + order]
+            chain[key].append(next_word)
+    
+    return chain
+
+
+def generate_markov_sentence(chain: dict, max_length: int = 30, start_length: int = 2) -> Optional[str]:
+    if not chain:
+        return None
+    
+    start_keys = [k for k in chain.keys() if k[0][0].isupper() or k[0].isupper()]
+    if not start_keys:
+        start_keys = list(chain.keys())
+    
+    current = random.choice(start_keys)
+    words = list(current)
+    
+    while len(words) < max_length:
+        key = tuple(words[-start_length:])
+        if key not in chain:
+            break
+        
+        next_word = random.choice(chain[key])
+        words.append(next_word)
+        
+        if next_word in '.!?':
+            break
+    
+    sentence = ' '.join(words)
+    
+    for punct in '.!?':
+        if punct in sentence:
+            sentence = sentence.split(punct)[0] + punct
+            break
+    
+    return sentence
+
+
+def process_texts_for_markov(texts: list[str], order: int = 2) -> dict:
+    chain = build_markov_chain(texts, order)
+    return chain
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+python-telegram-bot==20.7
+spacy==3.7.5