reddit-image-wall-getter/reddit_imgs/system/downloader/cache.py

175 lines
6.6 KiB
Python

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
from pathlib import Path
import json
import hashlib
import shutil
def get_normalized_link(link: str) -> str:
if link.startswith('/r/'):
link = 'https://www.reddit.com'+link
if link.startswith('/user/'):
link = 'https://www.reddit.com'+link
return link
def limit_filename_lenght(target: Path) -> Path:
return Path(*[part[:255] for part in target.parts])
def get_domain(link: str) -> str:
return get_path_for_caching(link).parts[1]
def get_path_for_caching(link: str) -> Path:
link = get_normalized_link(link)
target = Path('i_c').joinpath(link.split('://', 1)[1])
return limit_filename_lenght(target)
def has_file_cache(cached: Path) -> bool:
if not cached.exists():
return False
metafile = cached.joinpath('_meta.json')
if not metafile.exists():
return False
meta = json.loads(cached.joinpath('_meta.json').read_text())
if meta['type'] != 'file':
return False
file = cached.joinpath(meta['disk'])
return file.exists()
def read_file_from_cache(cached: Path) -> bytes:
if not cached.exists():
raise ValueError("Cannot read from non-existing cache: %r" % cached)
metafile = cached.joinpath('_meta.json')
if not metafile.exists():
raise ValueError("Cannot read from broken cache: %r" % metafile)
meta = json.loads(cached.joinpath('_meta.json').read_text())
if meta['type'] != 'file':
raise ValueError("Cannot read a gallery as single file: %r" % cached)
file = cached.joinpath(meta['disk'])
if not file.exists():
raise ValueError("Cannot locate missing file: %r" % file)
return file.read_bytes()
def uncache_download(link):
target = get_path_for_caching(link)
if target.exists():
shutil.rmtree(target)
def replicate_from_cache(download_target, link):
link = get_normalized_link(link)
download_path = Path(download_target)
target = get_path_for_caching(link)
target_meta = target.joinpath('_meta.json')
if not target_meta.exists():
return False
else:
target_metajson = json.loads(target_meta.read_text())
if target_metajson['type']=='file':
download_path.mkdir(parents=True, exist_ok=True)
ext = target_metajson['ext']
ffl = f"0000.{ext}"
target_file = target.joinpath(target_metajson['disk'])
sl = download_path.joinpath(ffl)
if sl.exists():
sl.unlink()
sl.symlink_to(f'../../{str(target_file)}')
download_path.joinpath('meta.json').write_text(json.dumps([{
'dname': ffl,
'ext': ext,
'link': link,
}], sort_keys=True, indent=2))
elif target_metajson['type']=='album':
download_path.mkdir(parents=True, exist_ok=True)
files = list()
for i, lnk in enumerate(target_metajson['link']):
fltarget = Path('i_c').joinpath(lnk.split('://', 1)[1])
fltarget = Path(*[part[:255] for part in fltarget.parts])
fltarget_meta = fltarget.joinpath('_meta.json')
fltarget_metajson = json.loads(fltarget_meta.read_text())
ext = fltarget_metajson['ext']
ffl = '%04d.%s' % (i, ext)
fltarget_file = fltarget.joinpath(fltarget_metajson['disk'])
sl = download_path.joinpath(ffl)
if sl.exists():
sl.unlink()
sl.symlink_to(f'../../{str(fltarget_file)}')
files.append({
'dname': ffl,
'ext': ext,
'link': lnk,
})
download_path.joinpath('meta.json').write_text(json.dumps(files, sort_keys=True, indent=2))
else:
raise ValueError("type field cannot be %r" % target_metajson['type'])
return True
def fix_cache(download_target, link):
link = get_normalized_link(link)
download_path = Path(download_target)
download_meta = download_path.joinpath('meta.json')
downloads = json.loads(download_meta.read_text())
target = get_path_for_caching(link)
target.mkdir(parents=True, exist_ok=True)
protocolless_link = link.split('://', 1)[1]
if len(downloads) == 1 and downloads[0]['link'].split('://', 1)[1] == protocolless_link:
return [fix_cache_relocate_single_file_from_download(download_path, downloads[0], target)]
else:
target_meta = target.joinpath('_meta.json')
if not target_meta.exists():
target_meta.write_text(json.dumps({
'type': 'album',
'link': [download['link'] for download in downloads],
}, sort_keys=True, indent=2))
return [
fix_cache_relocate_single_file_from_download(
download_path,
download,
Path('i_c').joinpath(download['link'].split('://', 1)[1])
)
for download in downloads
]
def fix_cache_relocate_single_file_from_download(download_path, download, target):
thumbs_dict = dict()
target = limit_filename_lenght(target)
target_meta = target.joinpath('_meta.json')
ext = download['ext']
target_file = target.joinpath(f"file.{ext}",)
target_hashfile = target.joinpath(f"file.{ext}.sha256",)
downloaded_file = download_path.joinpath(download['dname'])
if not downloaded_file.is_symlink():
target_meta.parent.mkdir(parents=True, exist_ok=True)
target_meta.write_text(json.dumps({
'type': 'file',
'link': download['link'].split('://', 1)[1],
'ext': ext,
'disk': target_file.name,
}, sort_keys=True, indent=2))
target_file.write_bytes(downloaded_file.read_bytes())
if target_hashfile.exists():
target_hashfile.unlink()
downloaded_file.unlink()
downloaded_file.symlink_to(f'../../{str(target_file)}')
if not target_file.exists():
shutil.rmtree(target) # cache is invalid; remove it
for fl in download_path.glob('*'):
if fl.is_symlink(): # download has a broken symlink into cache
shutil.rmtree(download_path)
break
raise Exception("Specified cached file does not exist.\n" +
f"Download path: {repr(download_path)}\n" +
f"Target: {repr(target)}")
if not target_hashfile.exists():
m = hashlib.sha256()
m.update(target_file.read_bytes())
target_hashfile.write_text(m.hexdigest())
thumbs_dict['file'] = str(target_file)
return thumbs_dict