#!/usr/bin/env python3 # -*- encoding: utf-8 -*- import datetime import dateutil.parser from .limits import minint, maxint from .textTools import slugify def getInfoFromRedditItem(bs): nsfw = 'over18' in bs['class'] sharer = bs.find(class_='author').text.strip() title = bs.find('a',class_='title').text.strip() link = str(bs.find('a',class_='title')['href']) domain = bs.find('span',class_='domain').find('a').text.strip() datakey = bs['data-fullname'] timestamp = int(dateutil.parser.parse(bs.find('time')['datetime']).strftime('%s')) flair = None try: flair = bs.find('span',class_='linkflairlabel').text.strip() except: pass return { 'nsfw': nsfw, 'link': link, 'title': title, 'flair': flair, 'sharer': sharer, 'domain': domain, 'datakey': datakey, 'timestamp': timestamp, } def getEmptySubredditData(srname): return { 'subreddit': srname, 'date_first': minint, 'date_last': maxint, 'links': list() } def getSubredditPageInfo(bs): pagetable = bs.find(id='siteTable') discussions = pagetable.find_all( lambda a: a.has_attr('class') and 'thing' in a['class'] ) links = list(filter(lambda a: 'self' not in a['class'],discussions)) first = minint last = maxint try: first = int(dateutil.parser.parse(discussions[0].find('time')['datetime']).strftime('%s')) except: pass try: last = int(dateutil.parser.parse(discussions[-1].find('time')['datetime']).strftime('%s')) except: pass nextpage = None try: nextpage = bs.find('div', class_='nav-buttons').find(class_='nextprev').find(class_='next-button').find('a')['href'] except: pass structured_links = list(map(getInfoFromRedditItem, links)) return first, last, nextpage, structured_links def assembleFileName(subreddit,link,seq,ext): imgfn = '' imgfn+= subreddit imgfn+= '__' imgfn+= datetime.datetime.fromtimestamp(int(link['timestamp'])).isoformat().replace('T','_').replace(':','-') imgfn+= '_' imgfn+= 'nsfw' if link['nsfw'] else 'safe' imgfn+= '___' imgfn+= '-' if link['flair'] is None else slugify(link['flair']) imgfn+= '___' imgfn+= '-' if link['sharer'] is None else slugify(link['sharer']) imgfn+= '___' imgfn+= slugify(link['title'][:50]) imgfn+= '___' imgfn+= slugify(link['datakey']) imgfn+= '___' imgfn+= str('%04d'%seq) imgfn+= '.'+ext return imgfn