reddit-image-wall-getter/reddit_imgs/fetch.py

88 lines
2.9 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
from .system import downloader as downloaderModule
import json
import os
downloaderGetter = downloaderModule.getDownloader
wdir = os.path.abspath('.')
isImageDirectLink = lambda s: s.endswith('.jpg') or s.endswith('.png') or s.endswith('.gif') or s.endswith('.webp')
def main():
links = list()
subreddits = sorted(filter(lambda sr: os.path.isdir(os.path.join(wdir,'r',sr)), os.listdir(os.path.join(wdir,'r'))))
for subreddit in subreddits:
srf = os.path.abspath(os.path.join(wdir,'r',subreddit,'subreddit.json'))
links2 = list()
try:
with open(srf) as f:
links2 = json.loads(f.read())['links']
except: pass
links+=links2
del links2
del srf
del subreddit
del subreddits
links.sort(key=lambda link: link['timestamp'])
medias = dict((('direct_link',list()),))
for link in links:
if isImageDirectLink(link['link']):
medias['direct_link'].append(link)
continue
if link['domain'] not in medias:
medias[link['domain']] = list()
medias[link['domain']].append(link)
del link
del links
priorities = list()
for source, links in sorted(medias.items()):
downloaderClass = downloaderGetter(source)
if downloaderClass is None:
print('No downloader for: {0:<35} | {1:>5} links dropped'.format(source,len(links)))
priorities.append((len(links),source))
del medias[source]
continue
top_priorities = list(reversed(sorted(priorities)))[:10]
prioremain = sum(map(lambda a: a[0], list(reversed(sorted(priorities)))[10:]))
priolen = len(priorities)
del priorities
for source, links in sorted(medias.items()):
print('Changing downloader for next %d links on %s'%(len(links),source))
#if source!='imgur.com': continue
downloaderClass = downloaderGetter(source)
downloader = downloaderClass()
for seq, link in enumerate(links):
print('Downloading link #%05d of %05d: %s << %s'%(seq+1, len(links), link['link'], link['datakey']))
if not downloader.recognizes(link['link']):
continue
target = os.path.join(wdir,'i',link['datakey'])
if not os.path.exists(target):
downloader.download(link['link']).into(target)
print()
print('='*47)
print('| {0:^43} |'.format('Missing downloaders'))
print('='*47)
print('| {0:^30} | {1:^10} |'.format('Domain','Hits'))
print('-'*47)
for priority in top_priorities:
print('| {0:^30} | {1:^10} |'.format(*list(reversed(priority))))
del priority
del top_priorities
print('|'+'.'*32+'|'+'.'*12+'|')
print('| {0:^30} | {1:^10} |'.format('...and more %d domains'%(priolen-10), prioremain))
del priolen
print('='*47)
print()
if __name__ == '__main__':
main()