reddit-image-wall-getter/reddit_imgs/system/downloader/modules/imgur_com.py

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-

import os
import re
import json
import shutil
import filetype
from pathlib import Path
from ._cacheable import get_link_bytes
from ..downloadedData import DownloadedData
from ... import simpleDownloader

COOKIEFILE = Path('imgur.authautologin.cookie.txt')
COOKIE = COOKIEFILE.read_text().strip() if COOKIEFILE.exists() else None

NSFW_LOGIN_WALL = "This page may contain erotic or adult imagery. You'll need to sign in if you still want to view it."
NSFW_LOGIN_WALL2 = "This post may contain erotic or adult imagery."

def works_on(domain):
    return domain in ['i.imgur.com', 'imgur.com', 'm.imgur.com', 'www.imgur.com']

class ImgurCom(object):
    def recognizes(self, link):
        return True
    
    def download(self, link):
        dd = DownloadedData()
        simpleDownloader.cleanCookies()
        simpleDownloader.setCookie('over18', '1')
        bts = b''
        if '/a/' not in link and '.gifv' not in link and '.webm' not in link:
            bts = get_link_bytes(link)
        elif link.endswith('.gifv'):
            bts = get_link_bytes(link[:-4]+'mp4')
        if bts is not None:
            ext = filetype.guess_extension(bts)
            if ext is not None:
                dd.put(link, bts, ext)
            else:
                if '.gifv' in link or '.webm' in link:
                    bts=None
                print(' '*50,end='',flush=True)
                print('\r',end='',flush=True)
                print(' `--> It wasn\'t a single image...',end='',flush=True)
                match = re.match(
                        "(https?)://(www\.)?(i\.|m\.|www\.)?imgur\.com/(?:(a|gallery|r|t)/)?(\w*)/?(\w*)(#[0-9]+)?(\.\w*)?",
                        link,
                        re.IGNORECASE
                )
                tp = match.group(4)
                ky = None
                if tp not in ('r', 't',):
                    ky = match.group(5)
                else:
                    ky = match.group(6)
                if not ky:
                    ky = match.group(5)
                link2 = 'https://imgur.com/a/'+str(ky)+'/all'
                if tp is None or tp=='' or tp=='r':
                    link2=link2.replace('/a/','/')[:-4]
                print('\r',end='',flush=True)
                print(' '*50,end='',flush=True)
                print('\r',end='',flush=True)
                if link2.endswith('/all') or bts is None:
                    print(' `--> Fetching album image list...',end='',flush=True)
                    bts = simpleDownloader.getUrlBytes(link2)
                else:
                    print(' `--> Album image list already fetched...',end='',flush=True)
                print('\r',end='',flush=True)
                if bts is None:
                    print(' '*50,end='',flush=True)
                    print('\r',end='',flush=True)
                    print(' `--> Gallery not found')
                    return DownloadedData()
                html = bts.decode('utf-8')
                if NSFW_LOGIN_WALL in html or NSFW_LOGIN_WALL2 in html:
                    print(' '*50,end='',flush=True)
                    print(' [has logged in]',end='',flush=True)
                    print('\r',end='',flush=True)
                    print(' `--> Fetching album image list with login...',end='',flush=True)
                    simpleDownloader.setCookie('authautologin', COOKIE)
                    bts = simpleDownloader.getUrlBytes(link2)
                    simpleDownloader.delCookie('authautologin')
                    html = bts.decode('utf-8')
                    print('\r',end='',flush=True)
                if 'widgetFactory.mergeConfig' not in html:
                    raise ValueError("imgur has changed templates")
                if "('gallery', {" not in html:
                    raise ValueError("imgur has changed templates")
                # print(link2+' ')
                albnfo = json.loads(
                    list(
                        filter(
                            lambda f: f.startswith('image'),
                            map(
                                str.strip,
                                list(
                                    filter(
                                        lambda f: f.startswith("('gallery', {"),
                                        html.split('widgetFactory.mergeConfig')
                                    )
                                )[0]
                                .strip()
                                .splitlines()
                            )
                        )
                    )[0][6:-1].strip()[1:].strip()
                )
                imgs = [albnfo]
                if 'album_images' in albnfo:
                    imgs = albnfo['album_images']['images']
                intermediarySaves = len(imgs)>=10
                if intermediarySaves:
                    if not os.path.isdir('tmp'):
                        os.makedirs('tmp')
                    if os.path.isfile('tmp/link.url'):
                        with open('tmp/link.url') as f:
                            svdlnk = f.read()
                            if svdlnk == link:
                                dd.loadfrom('tmp')
                            else:
                                shutil.rmtree('tmp')
                                os.makedirs('tmp')
                    with open('tmp/link.url', 'w') as f:
                        f.write(link)
                for seq, img in enumerate(imgs):
                    print(' '*50,end='',flush=True)
                    print('\r',end='',flush=True)
                    print(' `--> Album image #%03d of %03d'%(seq+1,len(imgs)),end='',flush=True)
                    print('\r',end='',flush=True)
                    if img['ext'] == '.gifv':
                        img['ext'] = '.mp4'
                    durl = 'http://i.imgur.com/'+img['hash']+img['ext']
                    if durl in dd.storedLinks():
                        continue
                    imb = get_link_bytes(durl)
                    if imb is None:
                        print()
                        print('Album part failed')
                        print()
                        simpleDownloader.cleanCookies()
                        return None
                    dd.put(durl, imb, img['ext'][1:])
                    if intermediarySaves and seq%10 == 0:
                        dd.into('tmp')
                    print('\r',end='',flush=True)
                if os.path.isdir('tmp'):
                    shutil.rmtree('tmp')
        simpleDownloader.cleanCookies()
        print('\r',end='',flush=True)
        print(' '*79,end='',flush=True)
        print('\r',end='',flush=True)
        return dd
    
def get_class():
    return ImgurCom
initial commit 2017-12-29 22:54:22 +00:00			`#!/usr/bin/env python3`
			`# -- encoding: utf-8 --`

update 2018-01-07 03:57:39 +00:00			`import os`
initial commit 2017-12-29 22:54:22 +00:00			`import re`
			`import json`
update 2018-01-07 03:57:39 +00:00			`import shutil`
initial commit 2017-12-29 22:54:22 +00:00			`import filetype`
update 2020-01-05 03:27:19 +00:00			`from pathlib import Path`
Added normalization stage to pipeline 2020-06-01 03:20:23 +00:00			`from ._cacheable import get_link_bytes`
initial commit 2017-12-29 22:54:22 +00:00			`from ..downloadedData import DownloadedData`
			`from ... import simpleDownloader`

update 2020-01-05 03:27:19 +00:00			`COOKIEFILE = Path('imgur.authautologin.cookie.txt')`
			`COOKIE = COOKIEFILE.read_text().strip() if COOKIEFILE.exists() else None`

			`NSFW_LOGIN_WALL = "This page may contain erotic or adult imagery. You'll need to sign in if you still want to view it."`
hashes and gallery-dl 2020-05-13 21:07:05 +00:00			`NSFW_LOGIN_WALL2 = "This post may contain erotic or adult imagery."`
update 2020-01-05 03:27:19 +00:00
initial commit 2017-12-29 22:54:22 +00:00			`def works_on(domain):`
			`return domain in ['i.imgur.com', 'imgur.com', 'm.imgur.com', 'www.imgur.com']`

			`class ImgurCom(object):`
			`def recognizes(self, link):`
			`return True`

			`def download(self, link):`
			`dd = DownloadedData()`
			`simpleDownloader.cleanCookies()`
hashes and gallery-dl 2020-05-13 21:07:05 +00:00			`simpleDownloader.setCookie('over18', '1')`
initial commit 2017-12-29 22:54:22 +00:00			`bts = b''`
			`if '/a/' not in link and '.gifv' not in link and '.webm' not in link:`
Added normalization stage to pipeline 2020-06-01 03:20:23 +00:00			`bts = get_link_bytes(link)`
hashes and gallery-dl 2020-05-13 21:07:05 +00:00			`elif link.endswith('.gifv'):`
Added normalization stage to pipeline 2020-06-01 03:20:23 +00:00			`bts = get_link_bytes(link[:-4]+'mp4')`
initial commit 2017-12-29 22:54:22 +00:00			`if bts is not None:`
			`ext = filetype.guess_extension(bts)`
update 2020-01-05 03:27:19 +00:00			`if ext is not None:`
			`dd.put(link, bts, ext)`
			`else:`
initial commit 2017-12-29 22:54:22 +00:00			`if '.gifv' in link or '.webm' in link:`
			`bts=None`
update 2020-01-05 03:27:19 +00:00			`print(' '*50,end='',flush=True)`
			`print('\r',end='',flush=True)`
			print(' `--> It wasn\'t a single image...',end='',flush=True)
initial commit 2017-12-29 22:54:22 +00:00			`match = re.match(`
update 2020-01-05 03:27:19 +00:00			`"(https?)://(www\.)?(i\.\|m\.\|www\.)?imgur\.com/(?:(a\|gallery\|r\|t)/)?(\w)/?(\w)(#[0-9]+)?(\.\w*)?",`
			`link,`
			`re.IGNORECASE`
initial commit 2017-12-29 22:54:22 +00:00			`)`
			`tp = match.group(4)`
			`ky = None`
update 2020-01-05 03:27:19 +00:00			`if tp not in ('r', 't',):`
initial commit 2017-12-29 22:54:22 +00:00			`ky = match.group(5)`
			`else:`
			`ky = match.group(6)`
			`if not ky:`
			`ky = match.group(5)`
			`link2 = 'https://imgur.com/a/'+str(ky)+'/all'`
			`if tp is None or tp=='' or tp=='r':`
			`link2=link2.replace('/a/','/')[:-4]`
update 2020-01-05 03:27:19 +00:00			`print('\r',end='',flush=True)`
			`print(' '*50,end='',flush=True)`
			`print('\r',end='',flush=True)`
initial commit 2017-12-29 22:54:22 +00:00			`if link2.endswith('/all') or bts is None:`
update 2020-01-05 03:27:19 +00:00			print(' `--> Fetching album image list...',end='',flush=True)
initial commit 2017-12-29 22:54:22 +00:00			`bts = simpleDownloader.getUrlBytes(link2)`
			`else:`
update 2020-01-05 03:27:19 +00:00			print(' `--> Album image list already fetched...',end='',flush=True)
			`print('\r',end='',flush=True)`
initial commit 2017-12-29 22:54:22 +00:00			`if bts is None:`
update 2020-01-05 03:27:19 +00:00			`print(' '*50,end='',flush=True)`
			`print('\r',end='',flush=True)`
initial commit 2017-12-29 22:54:22 +00:00			print(' `--> Gallery not found')
			`return DownloadedData()`
			`html = bts.decode('utf-8')`
hashes and gallery-dl 2020-05-13 21:07:05 +00:00			`if NSFW_LOGIN_WALL in html or NSFW_LOGIN_WALL2 in html:`
update 2020-01-05 03:27:19 +00:00			`print(' '*50,end='',flush=True)`
			`print(' [has logged in]',end='',flush=True)`
			`print('\r',end='',flush=True)`
			print(' `--> Fetching album image list with login...',end='',flush=True)
			`simpleDownloader.setCookie('authautologin', COOKIE)`
			`bts = simpleDownloader.getUrlBytes(link2)`
			`simpleDownloader.delCookie('authautologin')`
			`html = bts.decode('utf-8')`
			`print('\r',end='',flush=True)`
hashes and gallery-dl 2020-05-13 21:07:05 +00:00			`if 'widgetFactory.mergeConfig' not in html:`
			`raise ValueError("imgur has changed templates")`
			`if "('gallery', {" not in html:`
			`raise ValueError("imgur has changed templates")`
			`# print(link2+' ')`
update 2020-01-05 03:27:19 +00:00			`albnfo = json.loads(`
			`list(`
			`filter(`
			`lambda f: f.startswith('image'),`
			`map(`
			`str.strip,`
			`list(`
			`filter(`
			`lambda f: f.startswith("('gallery', {"),`
			`html.split('widgetFactory.mergeConfig')`
			`)`
hashes and gallery-dl 2020-05-13 21:07:05 +00:00			`)[0]`
			`.strip()`
			`.splitlines()`
update 2020-01-05 03:27:19 +00:00			`)`
			`)`
			`)[0][6:-1].strip()[1:].strip()`
			`)`
initial commit 2017-12-29 22:54:22 +00:00			`imgs = [albnfo]`
			`if 'album_images' in albnfo:`
			`imgs = albnfo['album_images']['images']`
update 2018-01-07 03:57:39 +00:00			`intermediarySaves = len(imgs)>=10`
			`if intermediarySaves:`
			`if not os.path.isdir('tmp'):`
			`os.makedirs('tmp')`
			`if os.path.isfile('tmp/link.url'):`
			`with open('tmp/link.url') as f:`
			`svdlnk = f.read()`
			`if svdlnk == link:`
			`dd.loadfrom('tmp')`
			`else:`
			`shutil.rmtree('tmp')`
			`os.makedirs('tmp')`
			`with open('tmp/link.url', 'w') as f:`
			`f.write(link)`
initial commit 2017-12-29 22:54:22 +00:00			`for seq, img in enumerate(imgs):`
update 2020-01-05 03:27:19 +00:00			`print(' '*50,end='',flush=True)`
			`print('\r',end='',flush=True)`
			print(' `--> Album image #%03d of %03d'%(seq+1,len(imgs)),end='',flush=True)
			`print('\r',end='',flush=True)`
initial commit 2017-12-29 22:54:22 +00:00			`if img['ext'] == '.gifv':`
			`img['ext'] = '.mp4'`
			`durl = 'http://i.imgur.com/'+img['hash']+img['ext']`
update 2018-01-07 03:57:39 +00:00			`if durl in dd.storedLinks():`
			`continue`
Added normalization stage to pipeline 2020-06-01 03:20:23 +00:00			`imb = get_link_bytes(durl)`
initial commit 2017-12-29 22:54:22 +00:00			`if imb is None:`
			`print()`
			`print('Album part failed')`
			`print()`
			`simpleDownloader.cleanCookies()`
			`return None`
			`dd.put(durl, imb, img['ext'][1:])`
update 2018-01-07 03:57:39 +00:00			`if intermediarySaves and seq%10 == 0:`
			`dd.into('tmp')`
update 2020-01-05 03:27:19 +00:00			`print('\r',end='',flush=True)`
update 2018-01-07 03:57:39 +00:00			`if os.path.isdir('tmp'):`
			`shutil.rmtree('tmp')`
initial commit 2017-12-29 22:54:22 +00:00			`simpleDownloader.cleanCookies()`
update 2020-01-05 03:27:19 +00:00			`print('\r',end='',flush=True)`
			`print(' '*79,end='',flush=True)`
			`print('\r',end='',flush=True)`
initial commit 2017-12-29 22:54:22 +00:00			`return dd`

			`def get_class():`
			`return ImgurCom`