#!/usr/bin/env python3 # -*- encoding: utf-8 -*- import os import re import json import shutil import filetype from pathlib import Path from ._cacheable import get_link_bytes from ..downloadedData import DownloadedData from ... import simpleDownloader COOKIEFILE = Path('imgur.authautologin.cookie.txt') COOKIE = COOKIEFILE.read_text().strip() if COOKIEFILE.exists() else None NSFW_LOGIN_WALL = "This page may contain erotic or adult imagery. You'll need to sign in if you still want to view it." NSFW_LOGIN_WALL2 = "This post may contain erotic or adult imagery." def works_on(domain): return domain in ['i.imgur.com', 'imgur.com', 'm.imgur.com', 'www.imgur.com'] class ImgurCom(object): def recognizes(self, link): return True def download(self, link): dd = DownloadedData() simpleDownloader.cleanCookies() simpleDownloader.setCookie('over18', '1') bts = b'' if '/a/' not in link and '.gifv' not in link and '.webm' not in link: bts = get_link_bytes(link) elif link.endswith('.gifv'): bts = get_link_bytes(link[:-4]+'mp4') if bts is not None: ext = filetype.guess_extension(bts) if ext is not None: dd.put(link, bts, ext) else: if '.gifv' in link or '.webm' in link: bts=None print(' '*50,end='',flush=True) print('\r',end='',flush=True) print(' `--> It wasn\'t a single image...',end='',flush=True) match = re.match( "(https?)://(www\.)?(i\.|m\.|www\.)?imgur\.com/(?:(a|gallery|r|t)/)?(\w*)/?(\w*)(#[0-9]+)?(\.\w*)?", link, re.IGNORECASE ) tp = match.group(4) ky = None if tp not in ('r', 't',): ky = match.group(5) else: ky = match.group(6) if not ky: ky = match.group(5) link2 = 'https://imgur.com/a/'+str(ky)+'/all' if tp is None or tp=='' or tp=='r': link2=link2.replace('/a/','/')[:-4] print('\r',end='',flush=True) print(' '*50,end='',flush=True) print('\r',end='',flush=True) if link2.endswith('/all') or bts is None: print(' `--> Fetching album image list...',end='',flush=True) bts = simpleDownloader.getUrlBytes(link2) else: print(' `--> Album image list already fetched...',end='',flush=True) print('\r',end='',flush=True) if bts is None: print(' '*50,end='',flush=True) print('\r',end='',flush=True) print(' `--> Gallery not found') return DownloadedData() html = bts.decode('utf-8') if NSFW_LOGIN_WALL in html or NSFW_LOGIN_WALL2 in html: print(' '*50,end='',flush=True) print(' [has logged in]',end='',flush=True) print('\r',end='',flush=True) print(' `--> Fetching album image list with login...',end='',flush=True) simpleDownloader.setCookie('authautologin', COOKIE) bts = simpleDownloader.getUrlBytes(link2) simpleDownloader.delCookie('authautologin') html = bts.decode('utf-8') print('\r',end='',flush=True) if 'widgetFactory.mergeConfig' not in html: raise ValueError("imgur has changed templates") if "('gallery', {" not in html: raise ValueError("imgur has changed templates") # print(link2+' ') albnfo = json.loads( list( filter( lambda f: f.startswith('image'), map( str.strip, list( filter( lambda f: f.startswith("('gallery', {"), html.split('widgetFactory.mergeConfig') ) )[0] .strip() .splitlines() ) ) )[0][6:-1].strip()[1:].strip() ) imgs = [albnfo] if 'album_images' in albnfo: imgs = albnfo['album_images']['images'] intermediarySaves = len(imgs)>=10 if intermediarySaves: if not os.path.isdir('tmp'): os.makedirs('tmp') if os.path.isfile('tmp/link.url'): with open('tmp/link.url') as f: svdlnk = f.read() if svdlnk == link: dd.loadfrom('tmp') else: shutil.rmtree('tmp') os.makedirs('tmp') with open('tmp/link.url', 'w') as f: f.write(link) for seq, img in enumerate(imgs): print(' '*50,end='',flush=True) print('\r',end='',flush=True) print(' `--> Album image #%03d of %03d'%(seq+1,len(imgs)),end='',flush=True) print('\r',end='',flush=True) if img['ext'] == '.gifv': img['ext'] = '.mp4' durl = 'http://i.imgur.com/'+img['hash']+img['ext'] if durl in dd.storedLinks(): continue imb = get_link_bytes(durl) if imb is None: print() print('Album part failed') print() simpleDownloader.cleanCookies() return None dd.put(durl, imb, img['ext'][1:]) if intermediarySaves and seq%10 == 0: dd.into('tmp') print('\r',end='',flush=True) if os.path.isdir('tmp'): shutil.rmtree('tmp') simpleDownloader.cleanCookies() print('\r',end='',flush=True) print(' '*79,end='',flush=True) print('\r',end='',flush=True) return dd def get_class(): return ImgurCom