reddit-image-wall-getter/reddit_imgs/system/subredditTools.py

147 lines
4.5 KiB
Python

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import datetime
import dateutil.parser
from .limits import minint, maxint
from .textTools import slugify
GATEWAY_LINK_ARGS = '&'.join([
"redditWebClient=web2x",
"app=web2x-client-production",
"allow_over18=1",
"layout=card",
"include=identity",
"sort=new"
])
def build_gateway_link(sr, after=None, dist=0):
d = [] if dist <= 0 else [f'dist={dist}']
a = [] if after is None else [f'after={after}']
return '&'.join([
f"https://gateway.reddit.com/desktopapi/v1/subreddits/{sr}?"+GATEWAY_LINK_ARGS,
*d, *a
])
def getInfoFromRedditItem(bs):
nsfw = 'over18' in bs['class']
sharer = bs.find(class_='author').text.strip()
title = bs.find('a', class_='title').text.strip()
link = str(bs.find('a', class_='title')['href'])
domain = 'reddit.com'
if bs.find('span', class_='domain').find('a') is not None:
domain = bs.find('span', class_='domain').find('a').text.strip()
datakey = bs['data-fullname']
timestamp = int(dateutil.parser.parse(bs.find('time')['datetime']).strftime('%s'))
flair = None
try:
flair = bs.find('span', class_='linkflairlabel').text.strip()
except:
pass
return {
'nsfw': nsfw,
'link': link,
'title': title,
'flair': flair,
'sharer': sharer,
'domain': domain,
'datakey': datakey,
'timestamp': timestamp,
}
def getInfoFromRedditJsonItem(jo):
return {
'nsfw': jo['isNSFW'],
'link': jo['source']['url'] if ('source' in jo and jo['source'] is not None) else jo['media']['content'],
'title': jo['title'],
'flair': next(iter([f['text'] for f in jo['flair'] if f['type'] == 'text']), None),
'sharer': jo['author'],
'domain': jo['domain'],
'datakey': jo['id'],
'timestamp': jo['created']//1000,
}
def getEmptySubredditData(srname):
return {
'subreddit': srname,
'date_first': minint,
'date_last': maxint,
'links': list()
}
def getSubredditPageJsonInfo(jo, subreddit, pageno):
structured_links = list()
if len(jo['postIds']) <= 0:
return maxint, minint, None, list()
for postId in jo['postIds']:
post = jo['posts'][postId]
if ((
(('source' in post) and (post['source'] is not None) and ('url' in post['source']))
or
(('media' in post) and (post['media'] is not None) and ('content' in post['media']))
) and (
('domain' in post) and (post['domain'] is not None)
) and (
('id' in post) and (isinstance(post['id'], str)) and (len(post['id']) < 20)
)):
structured_links.append(getInfoFromRedditJsonItem(post))
# tss = [sl['timestamp'] for sl in structured_links]
return (
structured_links[0]['timestamp'],
structured_links[-1]['timestamp'],
None if jo['token'] is None else build_gateway_link(subreddit, jo['token'], jo['dist']*pageno+1),
structured_links
)
def getSubredditPageInfo(bs):
pagetable = bs.find(id='siteTable')
discussions = pagetable.find_all(
lambda a: a.has_attr('class') and
'thing' in a['class']
)
links = list(filter(lambda a: 'self' not in a['class'], discussions))
first = minint
last = maxint
try:
first = int(dateutil.parser.parse(discussions[0].find('time')['datetime']).strftime('%s'))
except:
pass
try:
last = int(dateutil.parser.parse(discussions[-1].find('time')['datetime']).strftime('%s'))
except:
pass
nextpage = None
try:
nextpage = bs.find('div', class_='nav-buttons').find(class_='nextprev').find(class_='next-button').find('a')['href']
except:
pass
structured_links = list(map(getInfoFromRedditItem, links))
return first, last, nextpage, structured_links
def assembleFileName(subreddit, link, seq, ext):
imgfn = ''
imgfn += subreddit
imgfn += '__'
imgfn += datetime.datetime.fromtimestamp(int(link['timestamp'])).isoformat().replace('T', '_').replace(':', '-')
imgfn += '_'
imgfn += 'nsfw' if link['nsfw'] else 'safe'
imgfn += '___'
imgfn += '-' if link['flair'] is None else slugify(link['flair'])
imgfn += '___'
imgfn += '-' if link['sharer'] is None else slugify(link['sharer'])
imgfn += '___'
imgfn += slugify(link['title'][:50])
imgfn += '___'
imgfn += slugify(link['datakey'])
imgfn += '___'
imgfn += str('%04d' % seq)
imgfn += '.'+ext
return imgfn