reddit-image-wall-getter/reddit_imgs/system/downloader/modules/_user_tumblr_com.py

167 lines
6.6 KiB
Python

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import filetype
from bs4 import BeautifulSoup as _BS
from ..downloadedData import DownloadedData
from ... import simpleDownloader
def BeautifulSoup(data): return _BS(data, 'html5lib')
def works_on(domain):
return domain.endswith('.tumblr.com')
class UserTumblrCom(object):
def recognizes(self, link):
return True
def download(self, link):
dd = DownloadedData()
print(' '*50,end='')
print('\r',end='')
print(' `--> Fetching image list link',end='')
print('\r',end='')
pagebytes = simpleDownloader.getUrlBytes(link)
if pagebytes is None:
return dd
if b'safemode_actions_display' in pagebytes:
return dd
pagebs = BeautifulSoup(pagebytes)
postbs = None
if postbs is None:
postbs = pagebs.find(class_='entries')
if postbs is None:
postbs = pagebs.find(id='post')
if postbs is None:
postbs = pagebs.find(id='Post')
if postbs is None:
postbs = pagebs.find(class_='content')
if postbs is None:
postbs = pagebs.find(class_='Photo')
if postbs is None:
postbs = pagebs.find(class_='photo')
if postbs is None:
postbs = pagebs.find(class_='stat-photo')
if postbs is None:
postbs = pagebs.find(id='posts')
if postbs is None:
postbs = pagebs.find(id='base-container')
if postbs is None:
postbs = pagebs.find(class_='posts')
if postbs is None:
postbs = pagebs.find(class_='post-wrap')
if postbs is None:
postbs = pagebs.find(class_='text-post')
if postbs is None:
postbs = pagebs.find(class_='entry')
if postbs is None:
postbs = pagebs.find(class_='grid')
if postbs is None:
postbs = pagebs.find(class_='stat-answer')
if postbs is None:
postbs = pagebs.find(class_='article-content')
if postbs is None:
postbs = pagebs.find(id='content')
if postbs is None:
postbs = pagebs.find(id='stuff')
if postbs is None:
postbs = pagebs.find(class_='audio-post')
if postbs is None:
postbs = pagebs.find(class_='video-post')
if postbs is None:
postbs = pagebs.find(class_='media-post')
if postbs is None:
postbs = pagebs.find(class_='post-media')
if postbs is None:
postbs = pagebs.find(class_='p')
if postbs is None:
postbs = pagebs.find(class_='PhotoPost')
if postbs is None:
postbs = pagebs.find(class_='tmblr-full')
if postbs is None:
postbs = pagebs.find(class_='PhotoSet')
if postbs is None:
postbs = pagebs.find(class_='bigthings')
if postbs is None:
postbs = pagebs.find(class_='photoset-post')
if postbs is None:
postbs = pagebs.find(class_='text_wrap')
if postbs is None:
postbs = pagebs.find(class_='post-photo')
if postbs is None:
postbs = pagebs.find(class_='photo-post-photo')
if postbs is None:
postbs = pagebs.find(class_='photo-post')
if postbs is None:
postbs = pagebs.find(class_='PhotoPost')
if postbs is None:
postbs = pagebs.find(class_='wholeimage')
if postbs is None:
postbs = pagebs.find(class_='image')
if postbs is None:
postbs = pagebs.find(class_='zoombox')
iframebs = postbs.find('iframe', class_='photoset')
imagesbs = None
if iframebs is not None:
iframesrc = iframebs['src']
if iframesrc.startswith('/'):
iframesrc='https://www.tumblr.com'+iframesrc
print(' '*50,end='')
print('\r',end='')
print(' `--> Fetching image list',end='')
print('\r',end='')
iframebytes = simpleDownloader.getUrlBytes(iframesrc)
iframebs = BeautifulSoup(iframebytes)
imagesbs = iframebs.find_all('a',class_='photoset_photo')
else:
imagesbs = list()
imagesbs += list(map(extractimage, postbs.find_all(class_='photo-wrapper')))
imagesbs += list(map(extractimage, postbs.find_all(class_='PhotoWrapper')))
imagesbs += list(map(extractimage, postbs.find_all(class_='ThePhoto')))
imagesbs += list(map(extractimage, postbs.find_all(class_='photo')))
imagesbs += list(map(extractimage, postbs.find_all(class_='photo-data')))
imagesbs += list(map(extractimage, postbs.find_all(class_='post')))
imagesbs += list(map(extractimage, postbs.find_all(class_='box')))
imagesbs += list(map(extractimage, postbs.find_all(class_='media')))
imagesbs += list(map(extractimage, postbs.find_all(class_='full-image')))
imagesbs += list(map(extractimage, postbs.find_all(class_='full-media')))
imagesbs += list(map(extractimage, postbs.find_all(class_='image')))
imagesbs += list(map(extractimage, postbs.find_all(class_='imgback')))
imagesbs += list(map(extractimage, postbs.find_all('figure')))
imagesbs = list(filter(lambda a: a is not None, imagesbs))
imagesbs = list(map(lambda a: a['href'], imagesbs))
imagesbs = set(imagesbs)
imagesbs = list(map(lambda a: {'href': a}, imagesbs))
limagesbs = len(imagesbs)
for seq, imgbs in enumerate(imagesbs):
print(' '*50,end='')
print('\r',end='')
print(' `--> Album image #%03d of %03d'%(seq+1,limagesbs),end='')
print('\r',end='')
data = simpleDownloader.getUrlBytes(imgbs['href'])
if data is None:
return None
dd.put(imgbs['href'], data, filetype.guess_extension(data))
return dd
def extractimage(el):
t = el.find('a')
if t is None and el.name=='a':
t = el
img = el.find('img')
if img is None and el.name=='img':
img = el
if t is None or '.jpg' in t['href'] or '.png' in t['href'] or '.gif' in t['href'] or '.bmp' in t['href']:
if img is not None:
try:
t = {'href':img['src']}
except:
t = {'href':img['srcset'].split(' ')[0].split(',')[0]}
if t is not None:
if not ('.jpg' in t['href'] or '.png' in t['href'] or '.gif' in t['href'] or '.bmp' in t['href']):
t = None
return t
def get_class():
return UserTumblrCom