#!/usr/bin/env python3 # -*- encoding: utf-8 -*- import argparse import json import sys from pathlib import Path from typing import Any, Dict, List, Optional, TypeVar import gallery_dl import gallery_dl.path import gallery_dl.config import gallery_dl.extractor import gallery_dl.job import gallery_dl.postprocessor.common import gallery_dl.util PathFormatGalleryDl: gallery_dl.path.PathFormat = getattr( gallery_dl.path, 'PathFormatOriginal', gallery_dl.path.PathFormat) VT = TypeVar('VT') class NoneError(ValueError): @classmethod def check(cls, value: VT, default: Optional[VT] = None) -> VT: if value is None: if default is None: raise cls('None value found during check') else: return default return value class Objectify: def __init__(self, **kwargs): super().__init__() self.__dict__.update(kwargs) def get_normalized_link(link: str) -> str: if link.startswith('//'): link = 'http:'+link if link.startswith('/r/'): link = 'https://www.reddit.com'+link if link.startswith('/user/'): link = 'https://www.reddit.com'+link if link.startswith('/u/'): link = 'https://www.reddit.com'+link return link def limit_filename_lenght(target: Path) -> Path: return Path(*[part[:255] for part in target.parts]) def limit_path_max(target: Path) -> Path: bytes_to_remove = 4000 - len(str(target.absolute()).encode()) if bytes_to_remove < 0: target = Path( (str(target).encode()[:bytes_to_remove]).decode(errors='ignore')) return target def get_path_for_caching(link: str, prefix: Path = Path('')) -> Path: link = get_normalized_link(link) target = prefix.joinpath(link.split('://', 1)[-1]) return limit_path_max(limit_filename_lenght(target)) def parse_args(args: List[str]) -> argparse.Namespace: parser = argparse.ArgumentParser( description='Uses Gallery-DL to check extractors for links.') parser.add_argument('links', type=str, nargs='+', help='The identification for the logs') return parser.parse_args(args) def get_link_info(link: str) -> Dict[str, str]: extractor = NoneError.check(gallery_dl.extractor.find( link), Objectify(url=None, category=None, subcategory=None)) return dict( url_requested=link, url_extracted=extractor.url, url_requested_disk=str(get_path_for_caching(link)), url_extracted_disk=None if extractor.url is None else str( get_path_for_caching(extractor.url)), category=extractor.category, subcategory=extractor.subcategory, ) def main(args: List[str] = sys.argv[1:]) -> int: parsed = parse_args(args) sys.stdout.write(json.dumps( list(map(get_link_info, parsed.links)), indent=4)) return 0 if __name__ == "__main__": kwargs: Dict[str, Any] = dict() sys.exit(main(**kwargs))