#!/usr/bin/env python3 # -*- encoding: utf-8 -*- from pathlib import Path import json import hashlib import shutil def get_normalized_link(link: str) -> str: if link.startswith('/r/'): link = 'https://www.reddit.com'+link if link.startswith('/user/'): link = 'https://www.reddit.com'+link return link def limit_filename_lenght(target: Path) -> Path: return Path(*[part[:255] for part in target.parts]) def get_domain(link: str) -> str: return get_path_for_caching(link).parts[1] def get_path_for_caching(link: str) -> Path: link = get_normalized_link(link) target = Path('i_c').joinpath(link.split('://', 1)[1]) return limit_filename_lenght(target) def has_file_cache(cached: Path) -> bool: if not cached.exists(): return False metafile = cached.joinpath('_meta.json') if not metafile.exists(): return False meta = json.loads(cached.joinpath('_meta.json').read_text()) if meta['type'] != 'file': return False file = cached.joinpath(meta['disk']) return file.exists() def read_file_from_cache(cached: Path) -> bytes: if not cached.exists(): raise ValueError("Cannot read from non-existing cache: %r" % cached) metafile = cached.joinpath('_meta.json') if not metafile.exists(): raise ValueError("Cannot read from broken cache: %r" % metafile) meta = json.loads(cached.joinpath('_meta.json').read_text()) if meta['type'] != 'file': raise ValueError("Cannot read a gallery as single file: %r" % cached) file = cached.joinpath(meta['disk']) if not file.exists(): raise ValueError("Cannot locate missing file: %r" % file) return file.read_bytes() def uncache_download(link): target = get_path_for_caching(link) if target.exists(): shutil.rmtree(target) def replicate_from_cache(download_target, link): link = get_normalized_link(link) download_path = Path(download_target) target = get_path_for_caching(link) target_meta = target.joinpath('_meta.json') if not target_meta.exists(): return False else: target_metajson = json.loads(target_meta.read_text()) if target_metajson['type']=='file': download_path.mkdir(parents=True, exist_ok=True) ext = target_metajson['ext'] ffl = f"0000.{ext}" target_file = target.joinpath(target_metajson['disk']) sl = download_path.joinpath(ffl) if sl.exists(): sl.unlink() sl.symlink_to(f'../../{str(target_file)}') download_path.joinpath('meta.json').write_text(json.dumps([{ 'dname': ffl, 'ext': ext, 'link': link, }], sort_keys=True, indent=2)) elif target_metajson['type']=='album': download_path.mkdir(parents=True, exist_ok=True) files = list() for i, lnk in enumerate(target_metajson['link']): fltarget = Path('i_c').joinpath(lnk.split('://', 1)[1]) fltarget = Path(*[part[:255] for part in fltarget.parts]) fltarget_meta = fltarget.joinpath('_meta.json') fltarget_metajson = json.loads(fltarget_meta.read_text()) ext = fltarget_metajson['ext'] ffl = '%04d.%s' % (i, ext) fltarget_file = fltarget.joinpath(fltarget_metajson['disk']) sl = download_path.joinpath(ffl) if sl.exists(): sl.unlink() sl.symlink_to(f'../../{str(fltarget_file)}') files.append({ 'dname': ffl, 'ext': ext, 'link': lnk, }) download_path.joinpath('meta.json').write_text(json.dumps(files, sort_keys=True, indent=2)) else: raise ValueError("type field cannot be %r" % target_metajson['type']) return True def fix_cache(download_target, link): link = get_normalized_link(link) download_path = Path(download_target) download_meta = download_path.joinpath('meta.json') downloads = json.loads(download_meta.read_text()) target = get_path_for_caching(link) target.mkdir(parents=True, exist_ok=True) protocolless_link = link.split('://', 1)[1] if len(downloads) == 1 and downloads[0]['link'].split('://', 1)[1] == protocolless_link: return [fix_cache_relocate_single_file_from_download(download_path, downloads[0], target)] else: target_meta = target.joinpath('_meta.json') if not target_meta.exists(): target_meta.write_text(json.dumps({ 'type': 'album', 'link': [download['link'] for download in downloads], }, sort_keys=True, indent=2)) return [ fix_cache_relocate_single_file_from_download( download_path, download, Path('i_c').joinpath(download['link'].split('://', 1)[1]) ) for download in downloads ] def fix_cache_relocate_single_file_from_download(download_path, download, target): thumbs_dict = dict() target = limit_filename_lenght(target) target_meta = target.joinpath('_meta.json') ext = download['ext'] target_file = target.joinpath(f"file.{ext}",) target_hashfile = target.joinpath(f"file.{ext}.sha256",) downloaded_file = download_path.joinpath(download['dname']) if not downloaded_file.is_symlink(): target_meta.parent.mkdir(parents=True, exist_ok=True) target_meta.write_text(json.dumps({ 'type': 'file', 'link': download['link'].split('://', 1)[1], 'ext': ext, 'disk': target_file.name, }, sort_keys=True, indent=2)) target_file.write_bytes(downloaded_file.read_bytes()) if target_hashfile.exists(): target_hashfile.unlink() downloaded_file.unlink() downloaded_file.symlink_to(f'../../{str(target_file)}') if not target_file.exists(): shutil.rmtree(target) # cache is invalid; remove it for fl in download_path.glob('*'): if fl.is_symlink(): # download has a broken symlink into cache shutil.rmtree(download_path) break raise Exception("Specified cached file does not exist.\n" + f"Download path: {repr(download_path)}\n" + f"Target: {repr(target)}") if not target_hashfile.exists(): m = hashlib.sha256() m.update(target_file.read_bytes()) target_hashfile.write_text(m.hexdigest()) thumbs_dict['file'] = str(target_file) return thumbs_dict