#!/usr/bin/env python3 # -*- encoding: utf-8 -*- from .system import downloader as downloaderModule import json import os downloaderGetter = downloaderModule.getDownloader wdir = os.path.abspath('.') isImageDirectLink = lambda s: s.endswith('.jpg') or s.endswith('.png') or s.endswith('.gif') or s.endswith('.webp') def main(): links = list() subreddits = sorted(filter(lambda sr: os.path.isdir(os.path.join(wdir,'r',sr)), os.listdir(os.path.join(wdir,'r')))) for subreddit in subreddits: srf = os.path.abspath(os.path.join(wdir,'r',subreddit,'subreddit.json')) links2 = list() try: with open(srf) as f: links2 = json.loads(f.read())['links'] except: pass links+=links2 del links2 del srf del subreddit del subreddits links.sort(key=lambda link: link['timestamp']) medias = dict((('direct_link',list()),)) for link in links: if isImageDirectLink(link['link']): medias['direct_link'].append(link) continue if link['domain'] not in medias: medias[link['domain']] = list() medias[link['domain']].append(link) del link del links priorities = list() for source, links in sorted(medias.items()): downloaderClass = downloaderGetter(source) if downloaderClass is None: print('No downloader for: {0:<35} | {1:>5} links dropped'.format(source,len(links))) priorities.append((len(links),source)) del medias[source] continue top_priorities = list(reversed(sorted(priorities)))[:10] prioremain = sum(map(lambda a: a[0], list(reversed(sorted(priorities)))[10:])) priolen = len(priorities) del priorities for source, links in sorted(medias.items()): print('Changing downloader for next %d links on %s'%(len(links),source)) #if source!='imgur.com': continue downloaderClass = downloaderGetter(source) downloader = downloaderClass() for seq, link in enumerate(links): print('Downloading link #%05d of %05d: %s << %s'%(seq+1, len(links), link['link'], link['datakey'])) if not downloader.recognizes(link['link']): continue target = os.path.join(wdir,'i',link['datakey']) if not os.path.exists(target): downloader.download(link['link']).into(target) print() print('='*47) print('| {0:^43} |'.format('Missing downloaders')) print('='*47) print('| {0:^30} | {1:^10} |'.format('Domain','Hits')) print('-'*47) for priority in top_priorities: print('| {0:^30} | {1:^10} |'.format(*list(reversed(priority)))) del priority del top_priorities print('|'+'.'*32+'|'+'.'*12+'|') print('| {0:^30} | {1:^10} |'.format('...and more %d domains'%(priolen-10), prioremain)) del priolen print('='*47) print() if __name__ == '__main__': main()