reddit-image-wall-getter/reddit_imgs/system/downloader/modules/user_tumblr_com.py

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-

import filetype
from bs4 import BeautifulSoup as _BS
from ..downloadedData import DownloadedData
from ... import simpleDownloader

def BeautifulSoup(data): return _BS(data, 'html5lib')

def works_on(domain):
    return domain.endswith('.tumblr.com')

class UserTumblrCom(object):
    def recognizes(self, link):
        return True

    def download(self, link):
        dd = DownloadedData()
        print(' '*50,end='')
        print('\r',end='')
        print(' `--> Fetching image list link',end='')
        print('\r',end='')
        pagebytes = simpleDownloader.getUrlBytes(link)
        if pagebytes is None:
            return dd
        if b'safemode_actions_display' in pagebytes:
            return dd
        pagebs = BeautifulSoup(pagebytes)
        postbs = pagebs.find(class_='post')
        if postbs is None:
            postbs = pagebs.find(id='post')
        if postbs is None:
            postbs = pagebs.find(class_='content')
        if postbs is None:
            postbs = pagebs.find(class_='Photo')
        if postbs is None:
            postbs = pagebs.find(class_='photo')
        if postbs is None:
            postbs = pagebs.find(class_='stat-photo')
        if postbs is None:
            postbs = pagebs.find(id='posts')
        if postbs is None:
            postbs = pagebs.find(id='base-container')
        if postbs is None:
            postbs = pagebs.find(class_='posts')
        if postbs is None:
            postbs = pagebs.find(class_='post-wrap')
        if postbs is None:
            postbs = pagebs.find(class_='text-post')
        if postbs is None:
            postbs = pagebs.find(class_='entry')
        if postbs is None:
            postbs = pagebs.find(class_='grid')
        if postbs is None:
            postbs = pagebs.find(class_='stat-answer')
        if postbs is None:
            postbs = pagebs.find(class_='article-content')
        if postbs is None:
            postbs = pagebs.find(id='content')
        if postbs is None:
            postbs = pagebs.find(id='stuff')
        if postbs is None:
            postbs = pagebs.find(class_='audio-post')
        if postbs is None:
            postbs = pagebs.find(class_='video-post')
        if postbs is None:
            postbs = pagebs.find(class_='media-post')
        if postbs is None:
            postbs = pagebs.find(class_='post-media')
        if postbs is None:
            postbs = pagebs.find(class_='p')
        if postbs is None:
            postbs = pagebs.find(class_='PhotoPost')
        if postbs is None:
            postbs = pagebs.find(class_='zoombox')
        if postbs is None:
            postbs = pagebs.find(class_='image')
        if postbs is None:
            postbs = pagebs.find(class_='tmblr-full')
        if postbs is None:
            postbs = pagebs.find(class_='PhotoSet')
        if postbs is None:
            postbs = pagebs.find(class_='bigthings')
        if postbs is None:
            postbs = pagebs.find(class_='photoset-post')
        if postbs is None:
            postbs = pagebs.find(class_='text_wrap')
        iframebs = postbs.find('iframe', class_='photoset')
        imagesbs = None
        if iframebs is not None:
            iframesrc = iframebs['src']
            if iframesrc.startswith('/'):
                iframesrc='https://www.tumblr.com'+iframesrc
            print(' '*50,end='')
            print('\r',end='')
            print(' `--> Fetching image list',end='')
            print('\r',end='')
            iframebytes = simpleDownloader.getUrlBytes(iframesrc)
            iframebs = BeautifulSoup(iframebytes)
            imagesbs = iframebs.find_all('a',class_='photoset_photo')
        else:
            imagesbs = list(map(extractimage, postbs.find_all(class_='photo-wrapper')))
        limagesbs = len(imagesbs)
        for seq, imgbs in enumerate(imagesbs):
            print(' '*50,end='')
            print('\r',end='')
            print(' `--> Album image #%03d of %03d'%(seq+1,limagesbs),end='')
            print('\r',end='')
            data = simpleDownloader.getUrlBytes(imgbs['href'])
            if data is None:
                return None
            dd.put(imgbs['href'], data, filetype.guess_extension(data))
        return dd

def extractimage(el):
    t = el.find('a')
    if t is None:
        t = {'href':el.find('img')['src']}
    return t

def get_class():
    return UserTumblrCom