2020-05-13 21:07:05 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- encoding: utf-8 -*-
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
import json
|
|
|
|
import hashlib
|
|
|
|
import shutil
|
|
|
|
|
|
|
|
def get_normalized_link(link: str) -> str:
|
|
|
|
if link.startswith('/r/'):
|
|
|
|
link = 'https://www.reddit.com'+link
|
|
|
|
if link.startswith('/user/'):
|
|
|
|
link = 'https://www.reddit.com'+link
|
|
|
|
return link
|
|
|
|
|
|
|
|
|
|
|
|
def limit_filename_lenght(target: Path) -> Path:
|
|
|
|
return Path(*[part[:255] for part in target.parts])
|
|
|
|
|
|
|
|
|
|
|
|
def get_domain(link: str) -> str:
|
|
|
|
return get_path_for_caching(link).parts[1]
|
|
|
|
|
|
|
|
|
|
|
|
def get_path_for_caching(link: str) -> Path:
|
|
|
|
link = get_normalized_link(link)
|
|
|
|
target = Path('i_c').joinpath(link.split('://', 1)[1])
|
|
|
|
return limit_filename_lenght(target)
|
|
|
|
|
2020-06-01 03:20:23 +00:00
|
|
|
def has_file_cache(cached: Path) -> bool:
|
|
|
|
if not cached.exists():
|
|
|
|
return False
|
|
|
|
metafile = cached.joinpath('_meta.json')
|
|
|
|
if not metafile.exists():
|
|
|
|
return False
|
|
|
|
meta = json.loads(cached.joinpath('_meta.json').read_text())
|
|
|
|
if meta['type'] != 'file':
|
|
|
|
return False
|
|
|
|
file = cached.joinpath(meta['disk'])
|
|
|
|
return file.exists()
|
|
|
|
|
2020-05-13 21:07:05 +00:00
|
|
|
|
|
|
|
def read_file_from_cache(cached: Path) -> bytes:
|
|
|
|
if not cached.exists():
|
|
|
|
raise ValueError("Cannot read from non-existing cache: %r" % cached)
|
2020-06-01 03:20:23 +00:00
|
|
|
metafile = cached.joinpath('_meta.json')
|
|
|
|
if not metafile.exists():
|
|
|
|
raise ValueError("Cannot read from broken cache: %r" % metafile)
|
2020-05-13 21:07:05 +00:00
|
|
|
meta = json.loads(cached.joinpath('_meta.json').read_text())
|
|
|
|
if meta['type'] != 'file':
|
|
|
|
raise ValueError("Cannot read a gallery as single file: %r" % cached)
|
|
|
|
file = cached.joinpath(meta['disk'])
|
2020-06-01 03:20:23 +00:00
|
|
|
if not file.exists():
|
|
|
|
raise ValueError("Cannot locate missing file: %r" % file)
|
2020-05-13 21:07:05 +00:00
|
|
|
return file.read_bytes()
|
|
|
|
|
|
|
|
|
|
|
|
def uncache_download(link):
|
|
|
|
target = get_path_for_caching(link)
|
|
|
|
if target.exists():
|
|
|
|
shutil.rmtree(target)
|
|
|
|
|
|
|
|
|
|
|
|
def replicate_from_cache(download_target, link):
|
|
|
|
link = get_normalized_link(link)
|
|
|
|
download_path = Path(download_target)
|
|
|
|
target = get_path_for_caching(link)
|
|
|
|
target_meta = target.joinpath('_meta.json')
|
|
|
|
if not target_meta.exists():
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
target_metajson = json.loads(target_meta.read_text())
|
|
|
|
if target_metajson['type']=='file':
|
|
|
|
download_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
ext = target_metajson['ext']
|
|
|
|
ffl = f"0000.{ext}"
|
|
|
|
target_file = target.joinpath(target_metajson['disk'])
|
|
|
|
sl = download_path.joinpath(ffl)
|
|
|
|
if sl.exists():
|
|
|
|
sl.unlink()
|
|
|
|
sl.symlink_to(f'../../{str(target_file)}')
|
|
|
|
download_path.joinpath('meta.json').write_text(json.dumps([{
|
|
|
|
'dname': ffl,
|
|
|
|
'ext': ext,
|
|
|
|
'link': link,
|
|
|
|
}], sort_keys=True, indent=2))
|
|
|
|
elif target_metajson['type']=='album':
|
|
|
|
download_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
files = list()
|
|
|
|
for i, lnk in enumerate(target_metajson['link']):
|
|
|
|
fltarget = Path('i_c').joinpath(lnk.split('://', 1)[1])
|
|
|
|
fltarget = Path(*[part[:255] for part in fltarget.parts])
|
|
|
|
fltarget_meta = fltarget.joinpath('_meta.json')
|
|
|
|
fltarget_metajson = json.loads(fltarget_meta.read_text())
|
|
|
|
ext = fltarget_metajson['ext']
|
|
|
|
ffl = '%04d.%s' % (i, ext)
|
|
|
|
fltarget_file = fltarget.joinpath(fltarget_metajson['disk'])
|
|
|
|
sl = download_path.joinpath(ffl)
|
|
|
|
if sl.exists():
|
|
|
|
sl.unlink()
|
|
|
|
sl.symlink_to(f'../../{str(fltarget_file)}')
|
|
|
|
files.append({
|
|
|
|
'dname': ffl,
|
|
|
|
'ext': ext,
|
|
|
|
'link': lnk,
|
|
|
|
})
|
|
|
|
download_path.joinpath('meta.json').write_text(json.dumps(files, sort_keys=True, indent=2))
|
|
|
|
else:
|
|
|
|
raise ValueError("type field cannot be %r" % target_metajson['type'])
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def fix_cache(download_target, link):
|
|
|
|
link = get_normalized_link(link)
|
|
|
|
download_path = Path(download_target)
|
|
|
|
download_meta = download_path.joinpath('meta.json')
|
|
|
|
downloads = json.loads(download_meta.read_text())
|
|
|
|
target = get_path_for_caching(link)
|
|
|
|
target.mkdir(parents=True, exist_ok=True)
|
|
|
|
protocolless_link = link.split('://', 1)[1]
|
|
|
|
if len(downloads) == 1 and downloads[0]['link'].split('://', 1)[1] == protocolless_link:
|
|
|
|
return [fix_cache_relocate_single_file_from_download(download_path, downloads[0], target)]
|
|
|
|
else:
|
|
|
|
target_meta = target.joinpath('_meta.json')
|
|
|
|
if not target_meta.exists():
|
|
|
|
target_meta.write_text(json.dumps({
|
|
|
|
'type': 'album',
|
|
|
|
'link': [download['link'] for download in downloads],
|
|
|
|
}, sort_keys=True, indent=2))
|
|
|
|
return [
|
|
|
|
fix_cache_relocate_single_file_from_download(
|
|
|
|
download_path,
|
|
|
|
download,
|
|
|
|
Path('i_c').joinpath(download['link'].split('://', 1)[1])
|
|
|
|
)
|
|
|
|
for download in downloads
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def fix_cache_relocate_single_file_from_download(download_path, download, target):
|
|
|
|
thumbs_dict = dict()
|
|
|
|
target = limit_filename_lenght(target)
|
|
|
|
target_meta = target.joinpath('_meta.json')
|
|
|
|
ext = download['ext']
|
|
|
|
target_file = target.joinpath(f"file.{ext}",)
|
|
|
|
target_hashfile = target.joinpath(f"file.{ext}.sha256",)
|
|
|
|
downloaded_file = download_path.joinpath(download['dname'])
|
|
|
|
if not downloaded_file.is_symlink():
|
|
|
|
target_meta.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
target_meta.write_text(json.dumps({
|
|
|
|
'type': 'file',
|
|
|
|
'link': download['link'].split('://', 1)[1],
|
|
|
|
'ext': ext,
|
|
|
|
'disk': target_file.name,
|
|
|
|
}, sort_keys=True, indent=2))
|
|
|
|
target_file.write_bytes(downloaded_file.read_bytes())
|
|
|
|
if target_hashfile.exists():
|
|
|
|
target_hashfile.unlink()
|
|
|
|
downloaded_file.unlink()
|
|
|
|
downloaded_file.symlink_to(f'../../{str(target_file)}')
|
|
|
|
if not target_file.exists():
|
2020-06-01 03:20:23 +00:00
|
|
|
shutil.rmtree(target) # cache is invalid; remove it
|
|
|
|
for fl in download_path.glob('*'):
|
|
|
|
if fl.is_symlink(): # download has a broken symlink into cache
|
|
|
|
shutil.rmtree(download_path)
|
|
|
|
break
|
|
|
|
raise Exception("Specified cached file does not exist.\n" +
|
|
|
|
f"Download path: {repr(download_path)}\n" +
|
|
|
|
f"Target: {repr(target)}")
|
2020-05-13 21:07:05 +00:00
|
|
|
if not target_hashfile.exists():
|
|
|
|
m = hashlib.sha256()
|
|
|
|
m.update(target_file.read_bytes())
|
|
|
|
target_hashfile.write_text(m.hexdigest())
|
|
|
|
thumbs_dict['file'] = str(target_file)
|
|
|
|
return thumbs_dict
|