reddit-image-wall-getter/reddit_imgs/system/downloader/modules/imgur_com.py

158 lines
6.7 KiB
Python
Raw Normal View History

2017-12-29 22:54:22 +00:00
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
2018-01-07 03:57:39 +00:00
import os
2017-12-29 22:54:22 +00:00
import re
import json
2018-01-07 03:57:39 +00:00
import shutil
2017-12-29 22:54:22 +00:00
import filetype
2020-01-05 03:27:19 +00:00
from pathlib import Path
2020-06-01 03:20:23 +00:00
from ._cacheable import get_link_bytes
2017-12-29 22:54:22 +00:00
from ..downloadedData import DownloadedData
from ... import simpleDownloader
2020-01-05 03:27:19 +00:00
COOKIEFILE = Path('imgur.authautologin.cookie.txt')
COOKIE = COOKIEFILE.read_text().strip() if COOKIEFILE.exists() else None
NSFW_LOGIN_WALL = "This page may contain erotic or adult imagery. You'll need to sign in if you still want to view it."
2020-05-13 21:07:05 +00:00
NSFW_LOGIN_WALL2 = "This post may contain erotic or adult imagery."
2020-01-05 03:27:19 +00:00
2017-12-29 22:54:22 +00:00
def works_on(domain):
return domain in ['i.imgur.com', 'imgur.com', 'm.imgur.com', 'www.imgur.com']
class ImgurCom(object):
def recognizes(self, link):
return True
def download(self, link):
dd = DownloadedData()
simpleDownloader.cleanCookies()
2020-05-13 21:07:05 +00:00
simpleDownloader.setCookie('over18', '1')
2017-12-29 22:54:22 +00:00
bts = b''
if '/a/' not in link and '.gifv' not in link and '.webm' not in link:
2020-06-01 03:20:23 +00:00
bts = get_link_bytes(link)
2020-05-13 21:07:05 +00:00
elif link.endswith('.gifv'):
2020-06-01 03:20:23 +00:00
bts = get_link_bytes(link[:-4]+'mp4')
2017-12-29 22:54:22 +00:00
if bts is not None:
ext = filetype.guess_extension(bts)
2020-01-05 03:27:19 +00:00
if ext is not None:
dd.put(link, bts, ext)
else:
2017-12-29 22:54:22 +00:00
if '.gifv' in link or '.webm' in link:
bts=None
2020-01-05 03:27:19 +00:00
print(' '*50,end='',flush=True)
print('\r',end='',flush=True)
print(' `--> It wasn\'t a single image...',end='',flush=True)
2017-12-29 22:54:22 +00:00
match = re.match(
2020-01-05 03:27:19 +00:00
"(https?)://(www\.)?(i\.|m\.|www\.)?imgur\.com/(?:(a|gallery|r|t)/)?(\w*)/?(\w*)(#[0-9]+)?(\.\w*)?",
link,
re.IGNORECASE
2017-12-29 22:54:22 +00:00
)
tp = match.group(4)
ky = None
2020-01-05 03:27:19 +00:00
if tp not in ('r', 't',):
2017-12-29 22:54:22 +00:00
ky = match.group(5)
else:
ky = match.group(6)
if not ky:
ky = match.group(5)
link2 = 'https://imgur.com/a/'+str(ky)+'/all'
if tp is None or tp=='' or tp=='r':
link2=link2.replace('/a/','/')[:-4]
2020-01-05 03:27:19 +00:00
print('\r',end='',flush=True)
print(' '*50,end='',flush=True)
print('\r',end='',flush=True)
2017-12-29 22:54:22 +00:00
if link2.endswith('/all') or bts is None:
2020-01-05 03:27:19 +00:00
print(' `--> Fetching album image list...',end='',flush=True)
2017-12-29 22:54:22 +00:00
bts = simpleDownloader.getUrlBytes(link2)
else:
2020-01-05 03:27:19 +00:00
print(' `--> Album image list already fetched...',end='',flush=True)
print('\r',end='',flush=True)
2017-12-29 22:54:22 +00:00
if bts is None:
2020-01-05 03:27:19 +00:00
print(' '*50,end='',flush=True)
print('\r',end='',flush=True)
2017-12-29 22:54:22 +00:00
print(' `--> Gallery not found')
return DownloadedData()
html = bts.decode('utf-8')
2020-05-13 21:07:05 +00:00
if NSFW_LOGIN_WALL in html or NSFW_LOGIN_WALL2 in html:
2020-01-05 03:27:19 +00:00
print(' '*50,end='',flush=True)
print(' [has logged in]',end='',flush=True)
print('\r',end='',flush=True)
print(' `--> Fetching album image list with login...',end='',flush=True)
simpleDownloader.setCookie('authautologin', COOKIE)
bts = simpleDownloader.getUrlBytes(link2)
simpleDownloader.delCookie('authautologin')
html = bts.decode('utf-8')
print('\r',end='',flush=True)
2020-05-13 21:07:05 +00:00
if 'widgetFactory.mergeConfig' not in html:
raise ValueError("imgur has changed templates")
if "('gallery', {" not in html:
raise ValueError("imgur has changed templates")
# print(link2+' ')
2020-01-05 03:27:19 +00:00
albnfo = json.loads(
list(
filter(
lambda f: f.startswith('image'),
map(
str.strip,
list(
filter(
lambda f: f.startswith("('gallery', {"),
html.split('widgetFactory.mergeConfig')
)
2020-05-13 21:07:05 +00:00
)[0]
.strip()
.splitlines()
2020-01-05 03:27:19 +00:00
)
)
)[0][6:-1].strip()[1:].strip()
)
2017-12-29 22:54:22 +00:00
imgs = [albnfo]
if 'album_images' in albnfo:
imgs = albnfo['album_images']['images']
2018-01-07 03:57:39 +00:00
intermediarySaves = len(imgs)>=10
if intermediarySaves:
if not os.path.isdir('tmp'):
os.makedirs('tmp')
if os.path.isfile('tmp/link.url'):
with open('tmp/link.url') as f:
svdlnk = f.read()
if svdlnk == link:
dd.loadfrom('tmp')
else:
shutil.rmtree('tmp')
os.makedirs('tmp')
with open('tmp/link.url', 'w') as f:
f.write(link)
2017-12-29 22:54:22 +00:00
for seq, img in enumerate(imgs):
2020-01-05 03:27:19 +00:00
print(' '*50,end='',flush=True)
print('\r',end='',flush=True)
print(' `--> Album image #%03d of %03d'%(seq+1,len(imgs)),end='',flush=True)
print('\r',end='',flush=True)
2017-12-29 22:54:22 +00:00
if img['ext'] == '.gifv':
img['ext'] = '.mp4'
durl = 'http://i.imgur.com/'+img['hash']+img['ext']
2018-01-07 03:57:39 +00:00
if durl in dd.storedLinks():
continue
2020-06-01 03:20:23 +00:00
imb = get_link_bytes(durl)
2017-12-29 22:54:22 +00:00
if imb is None:
print()
print('Album part failed')
print()
simpleDownloader.cleanCookies()
return None
dd.put(durl, imb, img['ext'][1:])
2018-01-07 03:57:39 +00:00
if intermediarySaves and seq%10 == 0:
dd.into('tmp')
2020-01-05 03:27:19 +00:00
print('\r',end='',flush=True)
2018-01-07 03:57:39 +00:00
if os.path.isdir('tmp'):
shutil.rmtree('tmp')
2017-12-29 22:54:22 +00:00
simpleDownloader.cleanCookies()
2020-01-05 03:27:19 +00:00
print('\r',end='',flush=True)
print(' '*79,end='',flush=True)
print('\r',end='',flush=True)
2017-12-29 22:54:22 +00:00
return dd
def get_class():
return ImgurCom