69 lines
2.5 KiB
Python
69 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
# -*- encoding: utf-8 -*-
|
|
|
|
import re
|
|
import json
|
|
import filetype
|
|
from bs4 import BeautifulSoup as _BS
|
|
from ._cacheable import get_link_bytes
|
|
from ..downloadedData import DownloadedData
|
|
from ... import simpleDownloader
|
|
|
|
RGX_MATHURL = re.compile(r'\/r\/([^/]+)\/comments\/([^/]+).*')
|
|
|
|
def notnone(item):
|
|
return item is not None
|
|
|
|
def BeautifulSoup(data): return _BS(data, 'html5lib')
|
|
|
|
def matchRedditUploads(link):
|
|
return re.match(r'https?://i\.reddituploads\.com/([0-9a-fA-F]+)\??',link)
|
|
|
|
def works_on(domain):
|
|
return domain in ['i.redd.it','i.reddituploads.com','reddit.com','np.reddit.com','old.reddit.com']
|
|
|
|
class IReddIt(object):
|
|
def recognizes(self, link):
|
|
return link.startswith('/r/') or matchRedditUploads(link) is not None
|
|
|
|
def download(self, link):
|
|
dd = DownloadedData()
|
|
simpleDownloader.setCookies({'over18':1})
|
|
if 'reddit.com/r/' in link:
|
|
link = '/r/'+link.split('reddit.com/r/')[-1]
|
|
if link.startswith('/user/'):
|
|
link = 'https://www.reddit.com'+link
|
|
if link.startswith('/r/'):
|
|
link_grps = RGX_MATHURL.match(link).groups()
|
|
redditlink = (
|
|
f'https://gateway.reddit.com/desktopapi/v1/postcomments/t3_{link_grps[1]}?'+ '&'.join([
|
|
'rtj=only',
|
|
'emotes_as_images=true',
|
|
'allow_over18=1',
|
|
'include=identity',
|
|
f'subredditName={link_grps[0]}',
|
|
'hasSortParam=false',
|
|
'include_categories=true',
|
|
'onOtherDiscussions=false',
|
|
])
|
|
)
|
|
jo = json.loads(simpleDownloader.getUrlBytes(redditlink))
|
|
post = jo['posts'][f't3_{link_grps[1]}']
|
|
post_source_url = next(filter(notnone, [post.get('source')]), dict()).get('url', None)
|
|
post_media_content = next(filter(notnone, [post.get('media')]), dict()).get('content', None)
|
|
imgloc = next(filter(notnone, [post_source_url, post_media_content]), None)
|
|
if imgloc is not None:
|
|
data = get_link_bytes(imgloc)
|
|
if data is not None:
|
|
dd.put(imgloc, data, filetype.guess_extension(data))
|
|
elif matchRedditUploads(link):
|
|
data = simpleDownloader.getUrlBytes(link)
|
|
if data is not None:
|
|
dd.put(link, data, filetype.guess_extension(data))
|
|
else:
|
|
return None
|
|
return dd
|
|
|
|
def get_class():
|
|
return IReddIt
|