commit a37000e9a7eea5a7aaaee509a40723ad7a382153 Author: Adler Neves Date: Sat Feb 15 22:48:06 2020 -0300 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5a018fd --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +**/*.pyc +**/__pycache__ +**/__pycache__/** +virtual_env +virtual_env/** +.vscode +.vscode/** +.idea +.idea/** +.atom +.atom/** +/telegrambot.txt +/db.sqlite3 \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..5db9fa9 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,4 @@ +deploy: + stage: deploy + script: + make deploy diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0e9b0ab --- /dev/null +++ b/Makefile @@ -0,0 +1,44 @@ +help: + @echo -n + +deploy: + -sudo systemctl stop telegram-bot-use-proper-grammar.service + -sudo rm /etc/systemd/system/telegram-bot-use-proper-grammar.service + -sudo install telegram-bot-use-proper-grammar.service /etc/systemd/system + -sudo mkdir -p /var/www/telegram-bot-use-proper-grammar + -sudo rm -rf /var/www/telegram-bot-use-proper-grammar/propergrammar + -sudo rm -rf /var/www/telegram-bot-use-proper-grammar/Makefile + -sudo rm -rf /var/www/telegram-bot-use-proper-grammar/manage.py + -sudo cp -R propergrammar /var/www/telegram-bot-use-proper-grammar/propergrammar + -sudo install manage.py /var/www/telegram-bot-use-proper-grammar + -sudo install Makefile /var/www/telegram-bot-use-proper-grammar + -sudo install requirements.frozen.txt /var/www/telegram-bot-use-proper-grammar + sudo make depends -C /var/www/telegram-bot-use-proper-grammar + sudo make migrate -C /var/www/telegram-bot-use-proper-grammar + cd /var/www/telegram-bot-use-proper-grammar; sudo chown http:http -R . + sudo systemctl daemon-reload + sudo systemctl enable telegram-bot-use-proper-grammar.service + sudo systemctl restart telegram-bot-use-proper-grammar.service + +devmigrate: virtual_env + . virtual_env/bin/activate; python manage.py makemigrations + . virtual_env/bin/activate; python manage.py migrate + +migrate: virtual_env + . virtual_env/bin/activate; python manage.py migrate + +serve: virtual_env + . virtual_env/bin/activate; python -m propergrammar + +depends: virtual_env + . virtual_env/bin/activate; pip install -U -r requirements.frozen.txt + +depends-latest: virtual_env + . virtual_env/bin/activate; pip install -U -r requirements.txt + +freeze: virtual_env + . virtual_env/bin/activate; python -m pip freeze > requirements.frozen.txt + +virtual_env: + python3 -m virtualenv virtual_env + make depends diff --git a/README.md b/README.md new file mode 100644 index 0000000..07f94df --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +Telegram - Use Proper Grammar, Please! +=========================== + +Sometimes we need a bot that repeats a set of messages over and over... even more than the frequency you're willing to copy-paste. + +Because of those people who need to be reminded more than you're able to remind them, this bot exists. + +## Running locally + +- Create a bot +- Create the file `telegrambot.txt` containing the API key +- Run `make serve` on a terminal + +## Deploying + +- Copy `telegrambot.txt` to `/var/www/telegram-bot-use-proper-grammar` +- Run `make deploy` diff --git a/manage.py b/manage.py new file mode 100755 index 0000000..acbcd7e --- /dev/null +++ b/manage.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- + +import sys + +from propergrammar.djangosetup import setup_django + +if __name__ == '__main__': + setup_django() + from django.core.management import execute_from_command_line + execute_from_command_line(sys.argv) diff --git a/propergrammar/__init__.py b/propergrammar/__init__.py new file mode 100644 index 0000000..da98752 --- /dev/null +++ b/propergrammar/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- + +from pathlib import Path +from .telebot import start_bot + + +def main(): + start_bot(Path('telegrambot.txt').read_text().strip()) diff --git a/propergrammar/__main__.py b/propergrammar/__main__.py new file mode 100644 index 0000000..92a91cd --- /dev/null +++ b/propergrammar/__main__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- + +from . import main +from .djangosetup import setup_django + +if __name__ == "__main__": + setup_django() + main() diff --git a/propergrammar/djangosetup.py b/propergrammar/djangosetup.py new file mode 100644 index 0000000..deb7e1c --- /dev/null +++ b/propergrammar/djangosetup.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- + +import os, sys + +def setup_django(): + # Setup environ + sys.path.append(os.getcwd()) + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "propergrammar.settings") + + # Setup django + import django + django.setup() \ No newline at end of file diff --git a/propergrammar/migrations/0001_initial.py b/propergrammar/migrations/0001_initial.py new file mode 100644 index 0000000..7344916 --- /dev/null +++ b/propergrammar/migrations/0001_initial.py @@ -0,0 +1,47 @@ +# Generated by Django 3.0.3 on 2020-02-15 23:15 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Group', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('chat_id', models.IntegerField(default=0, unique=True)), + ('name', models.CharField(default='', max_length=255)), + ], + ), + migrations.CreateModel( + name='GroupDictionaryEntry', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('word', models.TextField(default='')), + ('group', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='words', to='propergrammar.Group')), + ], + ), + migrations.AddIndex( + model_name='group', + index=models.Index(fields=['id'], name='propergramm_id_f9b41c_idx'), + ), + migrations.AddIndex( + model_name='group', + index=models.Index(fields=['chat_id'], name='propergramm_chat_id_c6909f_idx'), + ), + migrations.AddIndex( + model_name='groupdictionaryentry', + index=models.Index(fields=['id'], name='propergramm_id_1fdb42_idx'), + ), + migrations.AddIndex( + model_name='groupdictionaryentry', + index=models.Index(fields=['group_id'], name='propergramm_group_i_fd6c71_idx'), + ), + ] diff --git a/propergrammar/migrations/__init__.py b/propergrammar/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/propergrammar/models.py b/propergrammar/models.py new file mode 100644 index 0000000..3f1460a --- /dev/null +++ b/propergrammar/models.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- + +from django.db import models + + +VALID_LANGUAGES_LST = 'en,es,de,fr,pt'.split(',') +VALID_LANGUAGES = tuple(zip( + *(VALID_LANGUAGES_LST, 'English,Español,Deutsch,Français,Português'.split(',')) +)) +VALID_LANGUAGES_DICT = dict(VALID_LANGUAGES) +LANGUAGES_SCOLDING = ''' +I guessed you were writing in "{0}" and I have the following suggestions: +Supuse que estabas escribiendo en "{0}" y tengo las siguientes sugerencias: +Ich vermute, Sie haben auf "{0}" geschrieben und ich habe die folgenden Vorschläge: +J'ai deviné que vous écriviez en "{0}" et j'ai les suggestions suivantes: +Imaginei que você estivesse escrevendo em "{0}" e tenho as seguintes sugestões:'''.splitlines()[1:] +LANGUAGES_SCOLDING_DICT = dict(tuple(zip( + *(VALID_LANGUAGES_LST, LANGUAGES_SCOLDING) +))) + + +class Group(models.Model): + chat_id = models.IntegerField( + default=0, + blank=False, + null=False, + unique=True + ) + name = models.CharField( + default="", + blank=False, + null=False, + max_length=255 + ) + + def __str__(self): + return f'{self.pk} - {self.name}' + + class Meta: + indexes = [ + models.Index(fields=['id']), + models.Index(fields=['chat_id']), + ] + + +class GroupDictionaryEntry(models.Model): + group = models.ForeignKey( + Group, + on_delete=models.CASCADE, + related_name='words' + ) + word = models.TextField( + default="", + blank=False, + null=False + ) + + def __str__(self): + return f'{self.pk} - {self.word}' + + class Meta: + indexes = [ + models.Index(fields=['id']), + models.Index(fields=['group_id']), + ] diff --git a/propergrammar/mwt.py b/propergrammar/mwt.py new file mode 100644 index 0000000..8e52c8c --- /dev/null +++ b/propergrammar/mwt.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- + +import time + + +class MWT(object): + """Memoize With Timeout""" + _caches = {} + _timeouts = {} + + def __init__(self, timeout=2): + self.timeout = timeout + + def collect(self): + """Clear cache of results which have timed out""" + for func in self._caches: + cache = {} + for key in self._caches[func]: + if (time.time() - self._caches[func][key][1]) < self._timeouts[func]: + cache[key] = self._caches[func][key] + self._caches[func] = cache + + def __call__(self, f): + self.cache = self._caches[f] = {} + self._timeouts[f] = self.timeout + + def func(*args, **kwargs): + kw = sorted(kwargs.items()) + key = (args, tuple(kw)) + try: + v = self.cache[key] + # print("cache") + if (time.time() - v[1]) > self.timeout: + raise KeyError + except KeyError: + # print("new") + v = self.cache[key] = f(*args, **kwargs), time.time() + return v[0] + func.func_name = f.__name__ + + return func diff --git a/propergrammar/settings.py b/propergrammar/settings.py new file mode 100644 index 0000000..96a927a --- /dev/null +++ b/propergrammar/settings.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- + +SECRET_KEY = "plz, stop complaining" + +INSTALLED_APPS = [ + 'propergrammar' +] + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': 'db.sqlite3', + } +} + +USE_TZ = True + +TIME_ZONE = "UTC" diff --git a/propergrammar/telebot.py b/propergrammar/telebot.py new file mode 100644 index 0000000..7454d7d --- /dev/null +++ b/propergrammar/telebot.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python3 +# -*- encoding: utf-8 -*- + +from .mwt import MWT +import sys +import telegram +import telegram.ext +from telegram.ext import Updater +from telegram.ext import CommandHandler +from telegram.ext import MessageHandler +from telegram.ext.filters import Filters +from spellchecker import SpellChecker +import logging +from django.utils import timezone +import random +import datetime +from io import BytesIO, StringIO +from telegram.constants import MAX_MESSAGE_LENGTH + +CHECKERS = dict() +Group: 'propergrammar.models.Group' = None +GroupDictionaryEntry: 'propergrammar.models.GroupDictionaryEntry' = None +logger = None +models = None + + +@MWT(timeout=60*5) +def get_admin_ids(bot, chat_id): + """Returns a list of admin IDs for a given chat. Results are cached for 1 hour.""" + return [admin.user.id for admin in bot.get_chat_administrators(chat_id)] + + +def start(update: telegram.Update, context: telegram.ext.CallbackContext): + chattype = update.message.chat.type + if chattype in ['private']: + context.bot.send_message( + chat_id=update.message.chat_id, + text="Invite me to your server!" + ) + elif chattype in ['group', 'supergroup']: + admins = get_admin_ids(context.bot, update.message.chat_id) + if update.message.from_user.id in admins: + is_group_admin(update, context) + send_help_message(update, context) + + +def send_help_message(update: telegram.Update, context: telegram.ext.CallbackContext): + context.bot.send_message( + chat_id=update.message.chat_id, + text='''Try using my services with the commands: +/add_group_dictionary +/show_group_dictionary +/remove_group_dictionary +/export_group_dictionary +(there is a clear_group_dictionary, but you'll have to type it yourself)''' + ) + + +def split_long_messages(message: str) -> list: + if len(message) <= MAX_MESSAGE_LENGTH: + return [message] + else: + lst = [] + lines = message.splitlines() + size = 0 + buffer = '' + for line in lines: + if len(buffer) + len(line) + 1 <= MAX_MESSAGE_LENGTH: + buffer += line + '\n' + else: + lst.append(buffer) + buffer = '' + if len(line) + 1 <= MAX_MESSAGE_LENGTH: + buffer += line + '\n' + else: + remainder = line + while len(remainder) + 1 >= MAX_MESSAGE_LENGTH: + humongous, remainder = ( + remainder[:MAX_MESSAGE_LENGTH], remainder[MAX_MESSAGE_LENGTH:]) + lst.append(humongous) + lst.append(remainder) + buffer = '' + if len(buffer) > 0: + lst.append(buffer) + return lst + + +def get_group(chat): + group = Group.objects.filter(chat_id=chat.id).first() + title = chat.title + if title is None: + title = f'{chat.first_name} {chat.last_name}' + if group is None: + group = Group(chat_id=chat.id, name=title) + group.save() + elif group.name != title: + group.name = title + group.save() + return group + + +def cmd_null(update: telegram.Update, context: telegram.ext.CallbackContext): + return + + +def remove_links(text, entities): + if isinstance(text, str): + text = list(text) + for entity in entities: + if entity.type in ['url', 'email', 'bot_command']: + for i in range(entity.offset, entity.offset+entity.length): + text[i] = '' + return ''.join(text) + else: + return text + + +def handle_message(update: telegram.Update, context: telegram.ext.CallbackContext): + chattype = update.message.chat.type + if chattype in ['group', 'supergroup', 'private']: + to_check = ' '.join( + list(filter(lambda x: x is not None, [ + remove_links(update.message.text, update.message.entities), + remove_links(update.message.caption, + update.message.caption_entities) + ])) + ).strip() + if len(to_check) > 0: + group = get_group(update.message.chat) + languages = models.VALID_LANGUAGES_LST + ignored = [x.word for x in group.words.all()] + language_rank = list() + for language in languages: + chk = CHECKERS[language] + words = chk.split_words(to_check) + if len(words) > 0: + language_rank.append( + (len(chk.known(words))/len(words), language) + ) + language_rank.sort() + language_rank.reverse() + language_confidence, language_main = language_rank[0] + if language_confidence == 0: + language_main = 'en' + checker_main = CHECKERS[language_main] + words = checker_main.split_words(to_check) + unknown = checker_main.unknown(words) + for lng in languages: + unknown = CHECKERS[lng].unknown(unknown) + unknown = set(unknown) + for unknown_word in list(unknown): + for ignored_word in ignored: + if unknown_word.lower() == ignored_word.lower(): + unknown.difference_update(unknown_word) + unknown = sorted(list(unknown)) + formatted_suggestions = [] + for typo in unknown: + typo_fixed = checker_main.correction(typo) + typo_fixes = checker_main.candidates(typo) + typo_fixes -= {typo_fixed} + formatted_suggestions.append( + f'{typo} → {typo_fixed}; {", ".join(typo_fixes)}' + ) + if len(unknown) > 0: + lecture = models.LANGUAGES_SCOLDING_DICT[language_main].format( + models.VALID_LANGUAGES_DICT[language_main] + )+'\n'+"\n".join(formatted_suggestions) + for message_segments in split_long_messages(lecture): + context.bot.send_message( + chat_id=update.message.chat_id, + reply_to_message_id=update.message.message_id, + text=message_segments + ) + + +def cmd_agd(update: telegram.Update, context: telegram.ext.CallbackContext): + chattype = update.message.chat.type + if chattype in ['group', 'supergroup']: + admins = get_admin_ids(context.bot, update.message.chat_id) + if update.message.from_user.id in admins: + group = get_group(update.message.chat) + wordlist = sorted( + list(set([w.word.lower() for w in group.words.all()]))) + new_words = ' '.join(remove_links( + update.message.text, update.message.entities).splitlines()).split() + status = f'Adding {len(new_words)} new words...\n' + for nword in new_words: + status += f'{nword}... ' + if nword.lower() in wordlist: + status += 'already on list\n' + else: + GroupDictionaryEntry(group=group, word=nword).save() + wordlist.append(nword.lower()) + status += 'OK\n' + status += f'New word count: {len(group.words.all())}' + for segment in split_long_messages(status): + context.bot.send_message( + chat_id=update.message.chat_id, + reply_to_message_id=update.message.message_id, + text=segment + ) + + +def cmd_rgd(update: telegram.Update, context: telegram.ext.CallbackContext): + chattype = update.message.chat.type + if chattype in ['group', 'supergroup']: + admins = get_admin_ids(context.bot, update.message.chat_id) + if update.message.from_user.id in admins: + group = get_group(update.message.chat) + old_words = ' '.join(remove_links( + update.message.text, update.message.entities).splitlines()).split() + status = f'Removing {len(old_words)} words...\n' + for oword in old_words: + status += f'{oword}... ' + entry = group.words.all().filter(word__iexact=oword).first() + if entry is None: + status += 'not found\n' + else: + entry.delete() + status += 'REMOVED\n' + status += f'New word count: {len(group.words.all())}' + for segment in split_long_messages(status): + context.bot.send_message( + chat_id=update.message.chat_id, + reply_to_message_id=update.message.message_id, + text=segment + ) + + +def cmd_sgd(update: telegram.Update, context: telegram.ext.CallbackContext): + chattype = update.message.chat.type + if chattype in ['group', 'supergroup']: + admins = get_admin_ids(context.bot, update.message.chat_id) + if update.message.from_user.id in admins: + group = get_group(update.message.chat) + wordlist = sorted([w.word for w in group.words.all()]) + wordlist_str = '\n'.join(wordlist) + msg = 'Here are all your %d entries you have on your group\'s dictionary:\n%s' % ( + len(wordlist), wordlist_str + ) + for fragment in split_long_messages(msg): + context.bot.send_message( + chat_id=update.message.chat_id, + reply_to_message_id=update.message.message_id, + text=fragment + ) + + +def cmd_egd(update: telegram.Update, context: telegram.ext.CallbackContext): + chattype = update.message.chat.type + if chattype in ['group', 'supergroup']: + admins = get_admin_ids(context.bot, update.message.chat_id) + if update.message.from_user.id in admins: + group = get_group(update.message.chat) + wordlist = sorted([w.word for w in group.words.all()]) + wordlist_str = '\n'.join(wordlist) + wordlist_str += '\n' + bio = BytesIO(wordlist_str.encode('UTF-8')) + bio.name = f'DictExport_{update.message.chat_id}_{str(update.message.date).replace(" ", "_").replace(":", "-")}.txt' + context.bot.send_document( + chat_id=update.message.chat_id, + reply_to_message_id=update.message.message_id, + document=bio + ) + + +def cmd_cgd(update: telegram.Update, context: telegram.ext.CallbackContext): + chattype = update.message.chat.type + if chattype in ['group', 'supergroup']: + admins = get_admin_ids(context.bot, update.message.chat_id) + if update.message.from_user.id in admins: + group = get_group(update.message.chat) + group.words.all().delete() + context.bot.send_message( + chat_id=update.message.chat_id, + reply_to_message_id=update.message.message_id, + text='Erased all entries from local dictionary.' + ) + + +def start_bot(token): + global Group + global GroupDictionaryEntry + global logger + global CHECKERS + global models + from .models import Group + from .models import GroupDictionaryEntry + from . import models + for lng in models.VALID_LANGUAGES_LST: + CHECKERS[lng] = SpellChecker(lng) + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + logger = logging.getLogger() + logger.setLevel(logging.INFO) + u = Updater(token, use_context=True) + d = u.dispatcher + d.add_handler(CommandHandler('start', start)) + d.add_handler(CommandHandler('add_group_dictionary', cmd_agd)) + d.add_handler(CommandHandler('remove_group_dictionary', cmd_rgd)) + d.add_handler(CommandHandler('show_group_dictionary', cmd_sgd)) + d.add_handler(CommandHandler('export_group_dictionary', cmd_egd)) + d.add_handler(CommandHandler('clear_group_dictionary', cmd_cgd)) + d.add_handler(CommandHandler('help', send_help_message)) + d.add_handler(MessageHandler(Filters.all, handle_message)) + u.start_polling() diff --git a/requirements.frozen.txt b/requirements.frozen.txt new file mode 100644 index 0000000..0fd1a5e --- /dev/null +++ b/requirements.frozen.txt @@ -0,0 +1,14 @@ +asgiref==3.2.3 +certifi==2019.11.28 +cffi==1.14.0 +cryptography==2.8 +decorator==4.4.1 +Django==3.0.3 +future==0.18.2 +pycparser==2.19 +pyspellchecker==0.5.3 +python-telegram-bot==12.4.2 +pytz==2019.3 +six==1.14.0 +sqlparse==0.3.0 +tornado==6.0.3 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..aa88f8b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +django +python-telegram-bot +pyspellchecker \ No newline at end of file diff --git a/spelling_libreoffice_colibri_cc0.svg b/spelling_libreoffice_colibri_cc0.svg new file mode 100644 index 0000000..8ab3678 --- /dev/null +++ b/spelling_libreoffice_colibri_cc0.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/spelling_libreoffice_colibri_cc0.svg.png b/spelling_libreoffice_colibri_cc0.svg.png new file mode 100644 index 0000000..2b8e3f3 Binary files /dev/null and b/spelling_libreoffice_colibri_cc0.svg.png differ diff --git a/spelling_libreoffice_colibri_cc0_48.svg b/spelling_libreoffice_colibri_cc0_48.svg new file mode 100644 index 0000000..be3e7d8 --- /dev/null +++ b/spelling_libreoffice_colibri_cc0_48.svg @@ -0,0 +1,85 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + diff --git a/telegram-bot-use-proper-grammar.service b/telegram-bot-use-proper-grammar.service new file mode 100644 index 0000000..dff3287 --- /dev/null +++ b/telegram-bot-use-proper-grammar.service @@ -0,0 +1,13 @@ +[Unit] +Description=Use Proper Grammar Bot telegram service +After=network.target + +[Service] +User=http +Group=http +WorkingDirectory=/var/www/telegram_use_proper_grammar +ExecStart=/usr/bin/make serve +KillSignal=SIGINT + +[Install] +WantedBy=multi-user.target