reddit-image-wall-getter/reddit_imgs/system/downloader/modules/user_tumblr_com.py
2020-01-05 00:27:19 -03:00

124 lines
4.3 KiB
Python

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import filetype
from bs4 import BeautifulSoup as _BS
from ..downloadedData import DownloadedData
from ... import simpleDownloader
def BeautifulSoup(data): return _BS(data, 'html5lib')
def works_on(domain):
return domain.endswith('.tumblr.com')
class UserTumblrCom(object):
def recognizes(self, link):
return True
def download(self, link):
dd = DownloadedData()
print(' '*50,end='')
print('\r',end='')
print(' `--> Fetching image list link',end='')
print('\r',end='')
pagebytes = simpleDownloader.getUrlBytes(link)
if pagebytes is None:
return dd
if b'safemode_actions_display' in pagebytes:
return dd
pagebs = BeautifulSoup(pagebytes)
postbs = pagebs.find(class_='post')
if postbs is None:
postbs = pagebs.find(id='post')
if postbs is None:
postbs = pagebs.find(class_='content')
if postbs is None:
postbs = pagebs.find(class_='Photo')
if postbs is None:
postbs = pagebs.find(class_='photo')
if postbs is None:
postbs = pagebs.find(class_='stat-photo')
if postbs is None:
postbs = pagebs.find(id='posts')
if postbs is None:
postbs = pagebs.find(id='base-container')
if postbs is None:
postbs = pagebs.find(class_='posts')
if postbs is None:
postbs = pagebs.find(class_='post-wrap')
if postbs is None:
postbs = pagebs.find(class_='text-post')
if postbs is None:
postbs = pagebs.find(class_='entry')
if postbs is None:
postbs = pagebs.find(class_='grid')
if postbs is None:
postbs = pagebs.find(class_='stat-answer')
if postbs is None:
postbs = pagebs.find(class_='article-content')
if postbs is None:
postbs = pagebs.find(id='content')
if postbs is None:
postbs = pagebs.find(id='stuff')
if postbs is None:
postbs = pagebs.find(class_='audio-post')
if postbs is None:
postbs = pagebs.find(class_='video-post')
if postbs is None:
postbs = pagebs.find(class_='media-post')
if postbs is None:
postbs = pagebs.find(class_='post-media')
if postbs is None:
postbs = pagebs.find(class_='p')
if postbs is None:
postbs = pagebs.find(class_='PhotoPost')
if postbs is None:
postbs = pagebs.find(class_='zoombox')
if postbs is None:
postbs = pagebs.find(class_='image')
if postbs is None:
postbs = pagebs.find(class_='tmblr-full')
if postbs is None:
postbs = pagebs.find(class_='PhotoSet')
if postbs is None:
postbs = pagebs.find(class_='bigthings')
if postbs is None:
postbs = pagebs.find(class_='photoset-post')
if postbs is None:
postbs = pagebs.find(class_='text_wrap')
iframebs = postbs.find('iframe', class_='photoset')
imagesbs = None
if iframebs is not None:
iframesrc = iframebs['src']
if iframesrc.startswith('/'):
iframesrc='https://www.tumblr.com'+iframesrc
print(' '*50,end='')
print('\r',end='')
print(' `--> Fetching image list',end='')
print('\r',end='')
iframebytes = simpleDownloader.getUrlBytes(iframesrc)
iframebs = BeautifulSoup(iframebytes)
imagesbs = iframebs.find_all('a',class_='photoset_photo')
else:
imagesbs = list(map(extractimage, postbs.find_all(class_='photo-wrapper')))
limagesbs = len(imagesbs)
for seq, imgbs in enumerate(imagesbs):
print(' '*50,end='')
print('\r',end='')
print(' `--> Album image #%03d of %03d'%(seq+1,limagesbs),end='')
print('\r',end='')
data = simpleDownloader.getUrlBytes(imgbs['href'])
if data is None:
return None
dd.put(imgbs['href'], data, filetype.guess_extension(data))
return dd
def extractimage(el):
t = el.find('a')
if t is None:
t = {'href':el.find('img')['src']}
return t
def get_class():
return UserTumblrCom