reddit-image-wall-getter/reddit_imgs/system/subredditTools.py

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-

import datetime
import dateutil.parser
from .limits import minint, maxint
from .textTools import slugify

GATEWAY_LINK_ARGS = '&'.join([
    "redditWebClient=web2x",
    "app=web2x-client-production",
    "allow_over18=1",
    "layout=card",
    "include=identity",
    "sort=new"
])


def build_gateway_link(sr, after=None, dist=0):
    d = [] if dist <= 0 else [f'dist={dist}']
    a = [] if after is None else [f'after={after}']
    return '&'.join([
        f"https://gateway.reddit.com/desktopapi/v1/subreddits/{sr}?"+GATEWAY_LINK_ARGS,
        *d, *a
    ])


def getInfoFromRedditItem(bs):
    nsfw = 'over18' in bs['class']
    sharer = bs.find(class_='author').text.strip()
    title = bs.find('a', class_='title').text.strip()
    link = str(bs.find('a', class_='title')['href'])
    domain = 'reddit.com'
    if bs.find('span', class_='domain').find('a') is not None:
        domain = bs.find('span', class_='domain').find('a').text.strip()
    datakey = bs['data-fullname']
    timestamp = int(dateutil.parser.parse(bs.find('time')['datetime']).strftime('%s'))
    flair = None
    try:
        flair = bs.find('span', class_='linkflairlabel').text.strip()
    except:
        pass
    return {
        'nsfw': nsfw,
        'link': link,
        'title': title,
        'flair': flair,
        'sharer': sharer,
        'domain': domain,
        'datakey': datakey,
        'timestamp': timestamp,
    }


def getInfoFromRedditJsonItem(jo):
    return {
        'nsfw': jo['isNSFW'],
        'link': jo['source']['url'] if ('source' in jo and jo['source'] is not None) else jo['media']['content'],
        'title': jo['title'],
        'flair': next(iter([f['text'] for f in jo['flair'] if f['type'] == 'text']), None),
        'sharer': jo['author'],
        'domain': jo['domain'],
        'datakey': jo['id'],
        'timestamp': jo['created']//1000,
    }


def getEmptySubredditData(srname):
    return {
        'subreddit': srname,
        'date_first': minint,
        'date_last': maxint,
        'links': list()
    }


def getSubredditPageJsonInfo(jo, subreddit, pageno):
    structured_links = list()
    if len(jo['postIds']) <= 0:
        return maxint, minint, None, list()
    for postId in jo['postIds']:
        post = jo['posts'][postId]
        if ((
            (('source' in post) and (post['source'] is not None) and ('url' in post['source']))
            or
            (('media' in post) and (post['media'] is not None) and ('content' in post['media']))
        ) and (
            ('domain' in post) and (post['domain'] is not None)
        ) and (
            ('id' in post) and (isinstance(post['id'], str)) and (len(post['id']) < 20)
        )):
            structured_links.append(getInfoFromRedditJsonItem(post))
    # tss = [sl['timestamp'] for sl in structured_links]
    return (
        structured_links[0]['timestamp'],
        structured_links[-1]['timestamp'],
        None if jo['token'] is None else build_gateway_link(subreddit, jo['token'], jo['dist']*pageno+1),
        structured_links
    )


def getSubredditPageInfo(bs):
    pagetable = bs.find(id='siteTable')
    discussions = pagetable.find_all(
        lambda a: a.has_attr('class') and
        'thing' in a['class']
    )
    links = list(filter(lambda a: 'self' not in a['class'], discussions))
    first = minint
    last = maxint
    try:
        first = int(dateutil.parser.parse(discussions[0].find('time')['datetime']).strftime('%s'))
    except:
        pass
    try:
        last = int(dateutil.parser.parse(discussions[-1].find('time')['datetime']).strftime('%s'))
    except:
        pass
    nextpage = None
    try:
        nextpage = bs.find('div', class_='nav-buttons').find(class_='nextprev').find(class_='next-button').find('a')['href']
    except:
        pass
    structured_links = list(map(getInfoFromRedditItem, links))
    return first, last, nextpage, structured_links


def assembleFileName(subreddit, link, seq, ext):
    imgfn = ''
    imgfn += subreddit
    imgfn += '__'
    imgfn += datetime.datetime.fromtimestamp(int(link['timestamp'])).isoformat().replace('T', '_').replace(':', '-')
    imgfn += '_'
    imgfn += 'nsfw' if link['nsfw'] else 'safe'
    imgfn += '___'
    imgfn += '-' if link['flair'] is None else slugify(link['flair'])
    imgfn += '___'
    imgfn += '-' if link['sharer'] is None else slugify(link['sharer'])
    imgfn += '___'
    imgfn += slugify(link['title'][:50])
    imgfn += '___'
    imgfn += slugify(link['datakey'])
    imgfn += '___'
    imgfn += str('%04d' % seq)
    imgfn += '.'+ext
    return imgfn