From f0c6f70b52b085e81b8bf83047f95e183df7e0fc Mon Sep 17 00:00:00 2001 From: ki Date: Sun, 15 Mar 2026 09:12:21 +0100 Subject: [PATCH] Code erstellt von OpenCode.ai --- .dockerignore | 4 ++ .env.example | 2 + Dockerfile | 21 ++++++ LICENSE | 21 ++++++ README.md | 45 +++++++++++++ bot.py | 151 ++++++++++++++++++++++++++++++++++++++++++++ config.py | 7 ++ database.py | 76 ++++++++++++++++++++++ docker-compose.yaml | 18 ++++++ markov.py | 72 +++++++++++++++++++++ requirements.txt | 2 + 11 files changed, 419 insertions(+) create mode 100644 .dockerignore create mode 100644 .env.example create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 bot.py create mode 100644 config.py create mode 100644 database.py create mode 100644 docker-compose.yaml create mode 100644 markov.py create mode 100644 requirements.txt diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..ce589a9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +messages.db +__pycache__/ +*.pyc +.env diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1b7cb5c --- /dev/null +++ b/.env.example @@ -0,0 +1,2 @@ +TELEGRAM_TOKEN=your_telegram_bot_token_here +RANDOM_CHANCE=0.1 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..695f948 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +RUN python -m spacy download de_core_news_sm + +COPY . . + +RUN mkdir -p /app/data + +CMD ["python", "bot.py"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3cef8a0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 1b17c14..a45d95e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,47 @@ # ulfbot +Ein Telegram Bot, der mithilfe von Markov-Ketten und spaCy grammatikalisch richtige Nachrichten generiert. + +## Features + +- **Markov-Kette (Order 2-3)**: Generiert Sätze basierend auf den letzten 1000 Nachrichten +- **spaCy Integration**: Nutzt Tokenisierung und POS-Tagging für bessere Grammatik +- **Reaktionsmodi**: + - Auf Mention reagieren (`@botname`) + - Auf Replys reagieren + - Zufällig in 10% der Fälle (konfigurierbar) +- **Persistente Speicherung**: SQLite-Datenbank für Nachrichten +- **Chat-spezifisch**: Jeder Chat hat seine eigene Wissensbasis + +## Installation + +```bash +pip install -r requirements.txt +python -m spacy download de_core_news_sm +``` + +## Docker + +```bash +cp .env.example .env +# .env mit Token bearbeiten +docker-compose up -d --build +``` + +## Usage in Telegram + +1. Füge den Bot einer Gruppe hinzu +2. Sage etwas im Chat +3. Bot speichert die Nachricht automatisch +4. Reagiere auf eine Nachricht mit Reply → Bot antwortet +5. Oder mentioniere den Bot (`@botname`) → Bot antwortet +6. Oder warte zufällig (10% Chance pro Nachricht) → Bot antwortet + +## Commands + +- `/start` - Start Nachricht +- `/stats` - Anzahl gespeicherter Nachrichten im aktuellen Chat + +## Lizenz + +MIT diff --git a/bot.py b/bot.py new file mode 100644 index 0000000..60604f5 --- /dev/null +++ b/bot.py @@ -0,0 +1,151 @@ +import logging +import random +from telegram import Update +from telegram.ext import ( + Application, + ContextTypes, + MessageHandler, + filters, + CommandHandler, +) +import sqlite3 + +from config import Config +from database import init_db, save_message, get_recent_messages, cleanup_old_messages, DB_PATH +from markov import build_markov_chain, generate_markov_sentence + + +def is_admin_or_owner(update: Update) -> tuple[bool, str]: + if update.message.chat.type not in ['group', 'supergroup']: + return True, '' + + chat_id = update.message.chat_id + user_id = update.message.from_user.id + bot_id = update.bot.id + + try: + chat_member = update.message.chat.get_member(bot_id) + status = chat_member.status + permissions = chat_member.permissions + + if status not in ['administrator', 'creator']: + return False, 'Ich benötige Admin-Rechte, um zu funktionieren.' + + if not permissions.can_send_messages: + return False, 'Ich benötige die Berechtigung, Nachrichten zu senden.' + + return True, '' + except Exception as e: + return False, f'Fehler beim Prüfen der Admin-Rechte: {str(e)}' + + +async def start(update: Update, context: ContextTypes.DEFAULT_TYPE): + is_admin, message = is_admin_or_owner(update) + + if not is_admin: + if message: + await update.message.reply_text(message) + return + + await update.message.reply_text( + 'Hallo! Ich bin ein Markov-Bot. ' + 'Sage mir etwas und ich kann später etwas Ähnliches generieren.' + ) + + +async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE): + message = update.message + chat_id = message.chat_id + user_id = message.from_user.id + username = message.from_user.username + text = message.text or '' + + if not text: + return + + save_message(chat_id, user_id, username, text) + + cleanup_old_messages(chat_id, Config.MAX_MESSAGES) + + +async def generate_and_send(update: Update, context: ContextTypes.DEFAULT_TYPE): + chat_id = update.message.chat_id + + recent_texts = get_recent_messages(chat_id, Config.MAX_MESSAGES) + + if not recent_texts: + await update.message.reply_text('Noch keine Nachrichten zum Lernen.') + return + + chain = build_markov_chain(recent_texts, order=2) + + if not chain: + await update.message.reply_text('Nicht genug Daten für Markov-Kette.') + return + + sentence = generate_markov_sentence(chain) + + if sentence: + await update.message.reply_text(sentence) + else: + await update.message.reply_text('Konnte keinen Satz generieren.') + + +async def handle_mention(update: Update, context: ContextTypes.DEFAULT_TYPE): + message = update.message + + if message.reply_to_message: + await generate_and_send(update, context) + return + + bot_username = context.bot.username + text = message.text or '' + + if f'@{bot_username}' in text: + await generate_and_send(update, context) + + +async def random_response(update: Update, context: ContextTypes.DEFAULT_TYPE): + if random.random() < Config.RANDOM_CHANCE: + await generate_and_send(update, context) + + +async def stats(update: Update, context: ContextTypes.DEFAULT_TYPE): + is_admin, message = is_admin_or_owner(update) + + if not is_admin: + if message: + await update.message.reply_text(message) + return + + chat_id = update.message.chat_id + + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute('SELECT COUNT(*) FROM messages WHERE chat_id = ?', (chat_id,)) + count = cursor.fetchone()[0] + conn.close() + + await update.message.reply_text(f'Gespeicherte Nachrichten in diesem Chat: {count}') + + +def main(): + init_db() + + if not Config.TELEGRAM_TOKEN: + raise ValueError('TELEGRAM_TOKEN nicht gesetzt in Environment') + + application = Application.builder().token(Config.TELEGRAM_TOKEN).build() + + application.add_handler(CommandHandler('start', start)) + application.add_handler(CommandHandler('stats', stats)) + application.add_handler(MessageHandler(filters.TEXT & filters.REPLY, handle_mention)) + application.add_handler(MessageHandler(filters.TEXT & (filters.ALL & ~filters.COMMAND), handle_message)) + application.add_handler(MessageHandler(filters.TEXT & (filters.ALL & ~filters.COMMAND), random_response)) + + logging.info('Bot startet...') + application.run_polling() + + +if __name__ == '__main__': + main() diff --git a/config.py b/config.py new file mode 100644 index 0000000..566bda5 --- /dev/null +++ b/config.py @@ -0,0 +1,7 @@ +import os + + +class Config: + TELEGRAM_TOKEN = os.getenv('TELEGRAM_TOKEN') + RANDOM_CHANCE = float(os.getenv('RANDOM_CHANCE', '0.1')) + MAX_MESSAGES = 1000 diff --git a/database.py b/database.py new file mode 100644 index 0000000..2fee23d --- /dev/null +++ b/database.py @@ -0,0 +1,76 @@ +import sqlite3 +from typing import Optional + + +DB_PATH = '/app/data/messages.db' + + +def init_db(): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + chat_id INTEGER NOT NULL, + user_id INTEGER NOT NULL, + username TEXT, + text TEXT NOT NULL, + timestamp DATETIME DEFAULT CURRENT_TIMESTAMP + ) + ''') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_chat ON messages(chat_id)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_timestamp ON messages(timestamp)') + conn.commit() + conn.close() + + +def save_message(chat_id: int, user_id: int, username: Optional[str], text: str): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + INSERT INTO messages (chat_id, user_id, username, text) VALUES (?, ?, ?, ?) + ''', (chat_id, user_id, username, text)) + conn.commit() + conn.close() + + +def get_recent_messages(chat_id: int, limit: int = 1000) -> list[str]: + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + SELECT text FROM messages + WHERE chat_id = ? + ORDER BY timestamp DESC LIMIT ? + ''', (chat_id, limit)) + messages = [row[0] for row in cursor.fetchall()] + conn.close() + return messages + + +def get_all_messages_for_chat(chat_id: int) -> list[tuple[str, str]]: + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + SELECT username, text FROM messages + WHERE chat_id = ? + ORDER BY timestamp DESC + ''', (chat_id,)) + messages = cursor.fetchall() + conn.close() + return messages + + +def cleanup_old_messages(chat_id: int, keep_count: int = 1000): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + DELETE FROM messages + WHERE id NOT IN ( + SELECT id FROM messages + WHERE chat_id = ? + ORDER BY timestamp DESC + LIMIT ? + ) + ''', (chat_id, keep_count)) + conn.commit() + conn.close() diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..bb65ffe --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,18 @@ +version: '3.8' + +services: + ulfbot: + build: . + container_name: ulfbot + restart: unless-stopped + environment: + - TELEGRAM_TOKEN=${TELEGRAM_TOKEN} + - RANDOM_CHANCE=${RANDOM_CHANCE:-0.1} + volumes: + - ./data:/app/data + networks: + - bot-network + +networks: + bot-network: + driver: bridge diff --git a/markov.py b/markov.py new file mode 100644 index 0000000..bed28ee --- /dev/null +++ b/markov.py @@ -0,0 +1,72 @@ +import random +from collections import defaultdict +from typing import Optional + +try: + import spacy + nlp = spacy.load('de_core_news_sm') +except ImportError: + print("Spacy model not found. Please run: python -m spacy download de_core_news_sm") + raise + + +def tokenize_text(text: str) -> list[str]: + doc = nlp(text) + tokens = [] + for token in doc: + if not token.is_space and not token.is_punct: + tokens.append(token.text) + return tokens + + +def build_markov_chain(texts: list[str], order: int = 2) -> dict: + chain = defaultdict(list) + + for text in texts: + tokens = tokenize_text(text) + if len(tokens) <= order: + continue + + for i in range(len(tokens) - order): + key = tuple(tokens[i:i + order]) + next_word = tokens[i + order] + chain[key].append(next_word) + + return chain + + +def generate_markov_sentence(chain: dict, max_length: int = 30, start_length: int = 2) -> Optional[str]: + if not chain: + return None + + start_keys = [k for k in chain.keys() if k[0][0].isupper() or k[0].isupper()] + if not start_keys: + start_keys = list(chain.keys()) + + current = random.choice(start_keys) + words = list(current) + + while len(words) < max_length: + key = tuple(words[-start_length:]) + if key not in chain: + break + + next_word = random.choice(chain[key]) + words.append(next_word) + + if next_word in '.!?': + break + + sentence = ' '.join(words) + + for punct in '.!?': + if punct in sentence: + sentence = sentence.split(punct)[0] + punct + break + + return sentence + + +def process_texts_for_markov(texts: list[str], order: int = 2) -> dict: + chain = build_markov_chain(texts, order) + return chain diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bc912d2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +python-telegram-bot==20.7 +spacy==3.7.5