77 lines
2.5 KiB
Python
77 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
# -*- encoding: utf-8 -*-
|
|
|
|
import datetime
|
|
import dateutil.parser
|
|
from .limits import minint, maxint
|
|
from .textTools import slugify
|
|
|
|
def getInfoFromRedditItem(bs):
|
|
nsfw = 'over18' in bs['class']
|
|
sharer = bs.find(class_='author').text.strip()
|
|
title = bs.find('a',class_='title').text.strip()
|
|
link = str(bs.find('a',class_='title')['href'])
|
|
domain = bs.find('span',class_='domain').find('a').text.strip()
|
|
datakey = bs['data-fullname']
|
|
timestamp = int(dateutil.parser.parse(bs.find('time')['datetime']).strftime('%s'))
|
|
flair = None
|
|
try: flair = bs.find('span',class_='linkflairlabel').text.strip()
|
|
except: pass
|
|
return {
|
|
'nsfw': nsfw,
|
|
'link': link,
|
|
'title': title,
|
|
'flair': flair,
|
|
'sharer': sharer,
|
|
'domain': domain,
|
|
'datakey': datakey,
|
|
'timestamp': timestamp,
|
|
}
|
|
|
|
def getEmptySubredditData(srname):
|
|
return {
|
|
'subreddit': srname,
|
|
'date_first': minint,
|
|
'date_last': maxint,
|
|
'links': list()
|
|
}
|
|
|
|
def getSubredditPageInfo(bs):
|
|
pagetable = bs.find(id='siteTable')
|
|
discussions = pagetable.find_all(
|
|
lambda a: a.has_attr('class') and
|
|
'thing' in a['class']
|
|
)
|
|
links = list(filter(lambda a: 'self' not in a['class'],discussions))
|
|
first = minint
|
|
last = maxint
|
|
try: first = int(dateutil.parser.parse(discussions[0].find('time')['datetime']).strftime('%s'))
|
|
except: pass
|
|
try: last = int(dateutil.parser.parse(discussions[-1].find('time')['datetime']).strftime('%s'))
|
|
except: pass
|
|
nextpage = None
|
|
try: nextpage = bs.find('div', class_='nav-buttons').find(class_='nextprev').find(class_='next-button').find('a')['href']
|
|
except: pass
|
|
structured_links = list(map(getInfoFromRedditItem, links))
|
|
return first, last, nextpage, structured_links
|
|
|
|
def assembleFileName(subreddit,link,seq,ext):
|
|
imgfn = ''
|
|
imgfn+= subreddit
|
|
imgfn+= '__'
|
|
imgfn+= datetime.datetime.fromtimestamp(int(link['timestamp'])).isoformat().replace('T','_').replace(':','-')
|
|
imgfn+= '_'
|
|
imgfn+= 'nsfw' if link['nsfw'] else 'safe'
|
|
imgfn+= '___'
|
|
imgfn+= '-' if link['flair'] is None else slugify(link['flair'])
|
|
imgfn+= '___'
|
|
imgfn+= '-' if link['sharer'] is None else slugify(link['sharer'])
|
|
imgfn+= '___'
|
|
imgfn+= slugify(link['title'][:50])
|
|
imgfn+= '___'
|
|
imgfn+= slugify(link['datakey'])
|
|
imgfn+= '___'
|
|
imgfn+= str('%04d'%seq)
|
|
imgfn+= '.'+ext
|
|
return imgfn
|