diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..ce589a9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +messages.db +__pycache__/ +*.pyc +.env diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1b7cb5c --- /dev/null +++ b/.env.example @@ -0,0 +1,2 @@ +TELEGRAM_TOKEN=your_telegram_bot_token_here +RANDOM_CHANCE=0.1 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..695f948 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.11-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +RUN python -m spacy download de_core_news_sm + +COPY . . + +RUN mkdir -p /app/data + +CMD ["python", "bot.py"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3cef8a0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 1b17c14..a45d95e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,47 @@ # ulfbot +Ein Telegram Bot, der mithilfe von Markov-Ketten und spaCy grammatikalisch richtige Nachrichten generiert. + +## Features + +- **Markov-Kette (Order 2-3)**: Generiert Sätze basierend auf den letzten 1000 Nachrichten +- **spaCy Integration**: Nutzt Tokenisierung und POS-Tagging für bessere Grammatik +- **Reaktionsmodi**: + - Auf Mention reagieren (`@botname`) + - Auf Replys reagieren + - Zufällig in 10% der Fälle (konfigurierbar) +- **Persistente Speicherung**: SQLite-Datenbank für Nachrichten +- **Chat-spezifisch**: Jeder Chat hat seine eigene Wissensbasis + +## Installation + +```bash +pip install -r requirements.txt +python -m spacy download de_core_news_sm +``` + +## Docker + +```bash +cp .env.example .env +# .env mit Token bearbeiten +docker-compose up -d --build +``` + +## Usage in Telegram + +1. Füge den Bot einer Gruppe hinzu +2. Sage etwas im Chat +3. Bot speichert die Nachricht automatisch +4. Reagiere auf eine Nachricht mit Reply → Bot antwortet +5. Oder mentioniere den Bot (`@botname`) → Bot antwortet +6. Oder warte zufällig (10% Chance pro Nachricht) → Bot antwortet + +## Commands + +- `/start` - Start Nachricht +- `/stats` - Anzahl gespeicherter Nachrichten im aktuellen Chat + +## Lizenz + +MIT diff --git a/bot.py b/bot.py new file mode 100644 index 0000000..60604f5 --- /dev/null +++ b/bot.py @@ -0,0 +1,151 @@ +import logging +import random +from telegram import Update +from telegram.ext import ( + Application, + ContextTypes, + MessageHandler, + filters, + CommandHandler, +) +import sqlite3 + +from config import Config +from database import init_db, save_message, get_recent_messages, cleanup_old_messages, DB_PATH +from markov import build_markov_chain, generate_markov_sentence + + +def is_admin_or_owner(update: Update) -> tuple[bool, str]: + if update.message.chat.type not in ['group', 'supergroup']: + return True, '' + + chat_id = update.message.chat_id + user_id = update.message.from_user.id + bot_id = update.bot.id + + try: + chat_member = update.message.chat.get_member(bot_id) + status = chat_member.status + permissions = chat_member.permissions + + if status not in ['administrator', 'creator']: + return False, 'Ich benötige Admin-Rechte, um zu funktionieren.' + + if not permissions.can_send_messages: + return False, 'Ich benötige die Berechtigung, Nachrichten zu senden.' + + return True, '' + except Exception as e: + return False, f'Fehler beim Prüfen der Admin-Rechte: {str(e)}' + + +async def start(update: Update, context: ContextTypes.DEFAULT_TYPE): + is_admin, message = is_admin_or_owner(update) + + if not is_admin: + if message: + await update.message.reply_text(message) + return + + await update.message.reply_text( + 'Hallo! Ich bin ein Markov-Bot. ' + 'Sage mir etwas und ich kann später etwas Ähnliches generieren.' + ) + + +async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE): + message = update.message + chat_id = message.chat_id + user_id = message.from_user.id + username = message.from_user.username + text = message.text or '' + + if not text: + return + + save_message(chat_id, user_id, username, text) + + cleanup_old_messages(chat_id, Config.MAX_MESSAGES) + + +async def generate_and_send(update: Update, context: ContextTypes.DEFAULT_TYPE): + chat_id = update.message.chat_id + + recent_texts = get_recent_messages(chat_id, Config.MAX_MESSAGES) + + if not recent_texts: + await update.message.reply_text('Noch keine Nachrichten zum Lernen.') + return + + chain = build_markov_chain(recent_texts, order=2) + + if not chain: + await update.message.reply_text('Nicht genug Daten für Markov-Kette.') + return + + sentence = generate_markov_sentence(chain) + + if sentence: + await update.message.reply_text(sentence) + else: + await update.message.reply_text('Konnte keinen Satz generieren.') + + +async def handle_mention(update: Update, context: ContextTypes.DEFAULT_TYPE): + message = update.message + + if message.reply_to_message: + await generate_and_send(update, context) + return + + bot_username = context.bot.username + text = message.text or '' + + if f'@{bot_username}' in text: + await generate_and_send(update, context) + + +async def random_response(update: Update, context: ContextTypes.DEFAULT_TYPE): + if random.random() < Config.RANDOM_CHANCE: + await generate_and_send(update, context) + + +async def stats(update: Update, context: ContextTypes.DEFAULT_TYPE): + is_admin, message = is_admin_or_owner(update) + + if not is_admin: + if message: + await update.message.reply_text(message) + return + + chat_id = update.message.chat_id + + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute('SELECT COUNT(*) FROM messages WHERE chat_id = ?', (chat_id,)) + count = cursor.fetchone()[0] + conn.close() + + await update.message.reply_text(f'Gespeicherte Nachrichten in diesem Chat: {count}') + + +def main(): + init_db() + + if not Config.TELEGRAM_TOKEN: + raise ValueError('TELEGRAM_TOKEN nicht gesetzt in Environment') + + application = Application.builder().token(Config.TELEGRAM_TOKEN).build() + + application.add_handler(CommandHandler('start', start)) + application.add_handler(CommandHandler('stats', stats)) + application.add_handler(MessageHandler(filters.TEXT & filters.REPLY, handle_mention)) + application.add_handler(MessageHandler(filters.TEXT & (filters.ALL & ~filters.COMMAND), handle_message)) + application.add_handler(MessageHandler(filters.TEXT & (filters.ALL & ~filters.COMMAND), random_response)) + + logging.info('Bot startet...') + application.run_polling() + + +if __name__ == '__main__': + main() diff --git a/config.py b/config.py new file mode 100644 index 0000000..566bda5 --- /dev/null +++ b/config.py @@ -0,0 +1,7 @@ +import os + + +class Config: + TELEGRAM_TOKEN = os.getenv('TELEGRAM_TOKEN') + RANDOM_CHANCE = float(os.getenv('RANDOM_CHANCE', '0.1')) + MAX_MESSAGES = 1000 diff --git a/database.py b/database.py new file mode 100644 index 0000000..2fee23d --- /dev/null +++ b/database.py @@ -0,0 +1,76 @@ +import sqlite3 +from typing import Optional + + +DB_PATH = '/app/data/messages.db' + + +def init_db(): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + chat_id INTEGER NOT NULL, + user_id INTEGER NOT NULL, + username TEXT, + text TEXT NOT NULL, + timestamp DATETIME DEFAULT CURRENT_TIMESTAMP + ) + ''') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_chat ON messages(chat_id)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_timestamp ON messages(timestamp)') + conn.commit() + conn.close() + + +def save_message(chat_id: int, user_id: int, username: Optional[str], text: str): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + INSERT INTO messages (chat_id, user_id, username, text) VALUES (?, ?, ?, ?) + ''', (chat_id, user_id, username, text)) + conn.commit() + conn.close() + + +def get_recent_messages(chat_id: int, limit: int = 1000) -> list[str]: + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + SELECT text FROM messages + WHERE chat_id = ? + ORDER BY timestamp DESC LIMIT ? + ''', (chat_id, limit)) + messages = [row[0] for row in cursor.fetchall()] + conn.close() + return messages + + +def get_all_messages_for_chat(chat_id: int) -> list[tuple[str, str]]: + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + SELECT username, text FROM messages + WHERE chat_id = ? + ORDER BY timestamp DESC + ''', (chat_id,)) + messages = cursor.fetchall() + conn.close() + return messages + + +def cleanup_old_messages(chat_id: int, keep_count: int = 1000): + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + cursor.execute(''' + DELETE FROM messages + WHERE id NOT IN ( + SELECT id FROM messages + WHERE chat_id = ? + ORDER BY timestamp DESC + LIMIT ? + ) + ''', (chat_id, keep_count)) + conn.commit() + conn.close() diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..bb65ffe --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,18 @@ +version: '3.8' + +services: + ulfbot: + build: . + container_name: ulfbot + restart: unless-stopped + environment: + - TELEGRAM_TOKEN=${TELEGRAM_TOKEN} + - RANDOM_CHANCE=${RANDOM_CHANCE:-0.1} + volumes: + - ./data:/app/data + networks: + - bot-network + +networks: + bot-network: + driver: bridge diff --git a/markov.py b/markov.py new file mode 100644 index 0000000..bed28ee --- /dev/null +++ b/markov.py @@ -0,0 +1,72 @@ +import random +from collections import defaultdict +from typing import Optional + +try: + import spacy + nlp = spacy.load('de_core_news_sm') +except ImportError: + print("Spacy model not found. Please run: python -m spacy download de_core_news_sm") + raise + + +def tokenize_text(text: str) -> list[str]: + doc = nlp(text) + tokens = [] + for token in doc: + if not token.is_space and not token.is_punct: + tokens.append(token.text) + return tokens + + +def build_markov_chain(texts: list[str], order: int = 2) -> dict: + chain = defaultdict(list) + + for text in texts: + tokens = tokenize_text(text) + if len(tokens) <= order: + continue + + for i in range(len(tokens) - order): + key = tuple(tokens[i:i + order]) + next_word = tokens[i + order] + chain[key].append(next_word) + + return chain + + +def generate_markov_sentence(chain: dict, max_length: int = 30, start_length: int = 2) -> Optional[str]: + if not chain: + return None + + start_keys = [k for k in chain.keys() if k[0][0].isupper() or k[0].isupper()] + if not start_keys: + start_keys = list(chain.keys()) + + current = random.choice(start_keys) + words = list(current) + + while len(words) < max_length: + key = tuple(words[-start_length:]) + if key not in chain: + break + + next_word = random.choice(chain[key]) + words.append(next_word) + + if next_word in '.!?': + break + + sentence = ' '.join(words) + + for punct in '.!?': + if punct in sentence: + sentence = sentence.split(punct)[0] + punct + break + + return sentence + + +def process_texts_for_markov(texts: list[str], order: int = 2) -> dict: + chain = build_markov_chain(texts, order) + return chain diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bc912d2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +python-telegram-bot==20.7 +spacy==3.7.5