rss2discordwh/rss2discordwh/__init__.py

119 lines
3.6 KiB
Python

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import sys
import time
import json
from pathlib import Path
from .simpleDownloader import getUrlBytes
from .simpleDownloader import cleanCookies
from .htmlStripper import strip_tags
import xml.etree.ElementTree
from urllib.request import Request
from urllib.request import urlopen
def make_brief(s, prevsz=200):
if len(s) > prevsz:
return s[:prevsz]+'...'
else:
return s
def webhooksend(url, msg):
data = json.dumps(msg).encode()
req = Request(
url,
data=data,
method='POST',
)
req.add_header('Content-Type', 'application/json')
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0')
return urlopen(req)
def read_conf(path):
cfg = []
for file in sorted(Path(path, 'watched_rss.d').glob('*')):
try:
file = Path(file)
link, dwh = None, None
with file.open() as f:
link, dwh, *_ = list(map(str.strip, f.read().splitlines()))
cfg.append({
'rsslink': link,
'webhook': dwh,
})
except BaseException as e:
print(e, file=sys.stderr)
return cfg
def read_state(path):
statefile = Path(path, 'state.json')
if not statefile.exists():
return {}
else:
with statefile.open() as f:
return json.loads(f.read())
def write_state(path, state):
with Path(path, 'state.json').open('w') as f:
return f.write(json.dumps(state, indent=4, sort_keys=True))
def fetch_updates(conf, state):
state = state.copy()
for newsfeed in conf:
cleanCookies()
rsslink, webhook = newsfeed['rsslink'], newsfeed['webhook']
rsscontent = getUrlBytes(rsslink)
if rsscontent is None:
continue
rsscontent = rsscontent.decode('utf-8', 'ignore')
rsscontent = xml.etree.ElementTree.fromstring(rsscontent)
to_convert_into_messages = rsscontent.find('channel').findall('item')
delayed_send = list()
for seq, to_convert_into_message in enumerate(to_convert_into_messages):
if seq > 2:
break
card = {}
card['title'] = strip_tags(to_convert_into_message.find('title').text)
card['link'] = to_convert_into_message.find('link').text
card['pubdate'] = to_convert_into_message.find('pubDate').text
card['desc'] = strip_tags(to_convert_into_message.find('description').text)
if seq == 0:
if card['link'] == state.get(rsslink):
break
else:
state[rsslink] = card['link']
elif card['link'] == state.get(rsslink):
break
webhookmessage = {
'embeds': [{
'title': card['title'],
'description': make_brief(card['desc']),
'url': card['link'],
'footer': {'text': card['pubdate']},
}],
}
delayed_send.append((webhook, webhookmessage))
cleanCookies()
delayed_send = delayed_send[:3]
for delayed_item in reversed(delayed_send):
webhooksend(*delayed_item)
return state
def main():
if len(sys.argv) <= 1:
raise Exception("Usage: python3 -m %s <conf_dir>" % sys.argv[0].rsplit('/', 2)[-2])
confpath = Path(sys.argv[1])
while True:
conf = read_conf(confpath)
state = read_state(confpath)
newstate = fetch_updates(conf, state)
write_state(confpath, newstate)
time.sleep(30)