#!/usr/bin/env python3 # -*- encoding: utf-8 -*- import re import json import filetype from bs4 import BeautifulSoup as _BS from ._cacheable import get_link_bytes from ..downloadedData import DownloadedData from ... import simpleDownloader RGX_MATHURL = re.compile(r'\/r\/([^/]+)\/comments\/([^/]+).*') def notnone(item): return item is not None def BeautifulSoup(data): return _BS(data, 'html5lib') def matchRedditUploads(link): return re.match(r'https?://i\.reddituploads\.com/([0-9a-fA-F]+)\??',link) def works_on(domain): return domain in ['i.redd.it','i.reddituploads.com','reddit.com','np.reddit.com','old.reddit.com'] class IReddIt(object): def recognizes(self, link): return link.startswith('/r/') or matchRedditUploads(link) is not None def download(self, link): dd = DownloadedData() simpleDownloader.setCookies({'over18':1}) if 'reddit.com/r/' in link: link = '/r/'+link.split('reddit.com/r/')[-1] if link.startswith('/user/'): link = 'https://www.reddit.com'+link if link.startswith('/r/'): link_grps = RGX_MATHURL.match(link).groups() redditlink = ( f'https://gateway.reddit.com/desktopapi/v1/postcomments/t3_{link_grps[1]}?'+ '&'.join([ 'rtj=only', 'emotes_as_images=true', 'allow_over18=1', 'include=identity', f'subredditName={link_grps[0]}', 'hasSortParam=false', 'include_categories=true', 'onOtherDiscussions=false', ]) ) jo = json.loads(simpleDownloader.getUrlBytes(redditlink)) post = jo['posts'][f't3_{link_grps[1]}'] post_source_url = next(filter(notnone, [post.get('source')]), dict()).get('url', None) post_media_content = next(filter(notnone, [post.get('media')]), dict()).get('content', None) imgloc = next(filter(notnone, [post_source_url, post_media_content]), None) if imgloc is not None: data = get_link_bytes(imgloc) if data is not None: dd.put(imgloc, data, filetype.guess_extension(data)) elif matchRedditUploads(link): data = simpleDownloader.getUrlBytes(link) if data is not None: dd.put(link, data, filetype.guess_extension(data)) else: return None return dd def get_class(): return IReddIt