88 lines
2.9 KiB
Python
Executable File
88 lines
2.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- encoding: utf-8 -*-
|
|
|
|
from .system import downloader as downloaderModule
|
|
import json
|
|
import os
|
|
|
|
downloaderGetter = downloaderModule.getDownloader
|
|
|
|
wdir = os.path.abspath('.')
|
|
|
|
isImageDirectLink = lambda s: s.endswith('.jpg') or s.endswith('.png') or s.endswith('.gif') or s.endswith('.webp')
|
|
|
|
def main():
|
|
links = list()
|
|
subreddits = sorted(filter(lambda sr: os.path.isdir(os.path.join(wdir,'r',sr)), os.listdir(os.path.join(wdir,'r'))))
|
|
for subreddit in subreddits:
|
|
srf = os.path.abspath(os.path.join(wdir,'r',subreddit,'subreddit.json'))
|
|
links2 = list()
|
|
try:
|
|
with open(srf) as f:
|
|
links2 = json.loads(f.read())['links']
|
|
except: pass
|
|
links+=links2
|
|
del links2
|
|
del srf
|
|
del subreddit
|
|
del subreddits
|
|
|
|
links.sort(key=lambda link: link['timestamp'])
|
|
|
|
medias = dict((('direct_link',list()),))
|
|
for link in links:
|
|
if isImageDirectLink(link['link']):
|
|
medias['direct_link'].append(link)
|
|
continue
|
|
if link['domain'] not in medias:
|
|
medias[link['domain']] = list()
|
|
medias[link['domain']].append(link)
|
|
del link
|
|
del links
|
|
|
|
priorities = list()
|
|
for source, links in sorted(medias.items()):
|
|
downloaderClass = downloaderGetter(source)
|
|
if downloaderClass is None:
|
|
print('No downloader for: {0:<35} | {1:>5} links dropped'.format(source,len(links)))
|
|
priorities.append((len(links),source))
|
|
del medias[source]
|
|
continue
|
|
|
|
top_priorities = list(reversed(sorted(priorities)))[:10]
|
|
prioremain = sum(map(lambda a: a[0], list(reversed(sorted(priorities)))[10:]))
|
|
priolen = len(priorities)
|
|
del priorities
|
|
|
|
for source, links in sorted(medias.items()):
|
|
print('Changing downloader for next %d links on %s'%(len(links),source))
|
|
#if source!='imgur.com': continue
|
|
downloaderClass = downloaderGetter(source)
|
|
downloader = downloaderClass()
|
|
for seq, link in enumerate(links):
|
|
print('Downloading link #%05d of %05d: %s << %s'%(seq+1, len(links), link['link'], link['datakey']))
|
|
if not downloader.recognizes(link['link']):
|
|
continue
|
|
target = os.path.join(wdir,'i',link['datakey'])
|
|
if not os.path.exists(target):
|
|
downloader.download(link['link']).into(target)
|
|
|
|
print()
|
|
print('='*47)
|
|
print('| {0:^43} |'.format('Missing downloaders'))
|
|
print('='*47)
|
|
print('| {0:^30} | {1:^10} |'.format('Domain','Hits'))
|
|
print('-'*47)
|
|
for priority in top_priorities:
|
|
print('| {0:^30} | {1:^10} |'.format(*list(reversed(priority))))
|
|
del priority
|
|
del top_priorities
|
|
print('|'+'.'*32+'|'+'.'*12+'|')
|
|
print('| {0:^30} | {1:^10} |'.format('...and more %d domains'%(priolen-10), prioremain))
|
|
del priolen
|
|
print('='*47)
|
|
print()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|