reddit-image-wall-getter/reddit_imgs/download_pruner.py

177 lines
6.5 KiB
Python

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import json
from pathlib import Path
from .system.cmdline_parser import parse_cmdline
from typing import Tuple, List, FrozenSet
import colored as clrlib
import multiprocessing
def cmdline(encoded_args: str = None):
if encoded_args is None:
return run_with_config()
else:
return parse_cmdline(run_with_config, encoded_args)
def run_with_config():
main()
def flatten(list_of_lists):
return [item for sublist in list_of_lists for item in sublist]
class FileDeleter(multiprocessing.Process):
def __init__(self,
file_queue: multiprocessing.Queue):
multiprocessing.Process.__init__(self)
self.file_queue = file_queue
def run(self):
proc_name = self.name
while True:
next_file: Path = self.file_queue.get()
if next_file is None:
# Poison pill means shutdown
print(f'{proc_name}: Exiting')
break
print(clrlib.stylize(f'Deleted at {proc_name}: {next_file}', [clrlib.fg('light_magenta')]))
next_file.unlink()
return
class FileDeletionChecker(multiprocessing.Process):
def __init__(self,
allowed_files: FrozenSet[str],
check_queue: multiprocessing.Queue,
deletion_queue: multiprocessing.Queue):
multiprocessing.Process.__init__(self)
self.check_queue = check_queue
self.deletion_queue = deletion_queue
self.allowed_files = allowed_files
def run(self):
proc_name = self.name
while True:
next_file: str = self.check_queue.get()
if next_file is None:
# Poison pill means shutdown
print(f'{proc_name}: Exiting')
# Forward poison pill
self.deletion_queue.put(None)
break
if next_file not in self.allowed_files:
print(clrlib.stylize(f'Segregating for deletion at {proc_name}: {next_file}', [
clrlib.fg('red'),
]))
self.deletion_queue.put(Path(next_file))
# else:
# print(clrlib.stylize(f'File will be kept at {proc_name}: {next_file}', [
# clrlib.fg('green'),
# ]), end='\r', flush=True)
def main():
print(clrlib.stylize('Loading file list...', [clrlib.fg('light_cyan'), clrlib.attr('bold'), ]))
file_list_json_text = Path('i_gdl_ffl.json').read_text()
print(clrlib.stylize('Parsing file list...', [clrlib.fg('light_cyan'), ]))
file_list_json_obj = json.loads(file_list_json_text)
print(clrlib.stylize('Processing file list...', [clrlib.fg('light_cyan'), ]))
files: List[str] = list(map(Path, set(flatten(file_list_json_obj.values()))))
files: List[str] = sorted(files)
for file in files.copy():
files.append(Path(str(file)+'.json'))
files.append(Path('i_gdl/.cookies'))
# print(clrlib.stylize('Making paths absolute...', [clrlib.fg('light_cyan'), ]))
files: List[str] = sorted(list(map(str, files)))
files_tuple: Tuple[str] = tuple(files)
files_frozenset: FrozenSet[str] = frozenset(files_tuple)
print(clrlib.stylize('Starting to prune files from filesystem...', [clrlib.fg('light_cyan'), clrlib.attr('bold'), ]))
file_checking_for_deletion_queue = multiprocessing.Queue()
file_deletion_queue = multiprocessing.Queue()
processFileDeleter = FileDeleter(file_deletion_queue)
processFileDeletionChecker = FileDeletionChecker(files_frozenset, file_checking_for_deletion_queue, file_deletion_queue)
processFileDeleter.start()
processFileDeletionChecker.start()
recursive_file_prunning(Path('i_gdl'), file_checking_for_deletion_queue)
file_checking_for_deletion_queue.put(None)
processFileDeleter.join()
processFileDeletionChecker.join()
file_checking_for_deletion_queue.close()
file_checking_for_deletion_queue.join_thread()
file_deletion_queue.close()
file_deletion_queue.join_thread()
print(clrlib.stylize('Starting to prune directories from filesystem...', [clrlib.fg('light_cyan'), clrlib.attr('bold'), ]))
recursive_empty_dir_prunning(Path('i_gdl'))
def recursive_file_prunning(start_path: Path, queue: multiprocessing.Queue):
print(clrlib.stylize(f'Checking directory: {start_path} ...', [clrlib.attr('dim'), ]),
end='\r', flush=True)
subpaths = list()
files = list()
sub_dirs = list()
for e, subpath in enumerate(start_path.iterdir()):
subpaths.append(subpath)
if subpath.is_file():
files.append(subpath)
elif subpath.is_dir():
sub_dirs.append(subpath)
if (e % 10) == 0:
print(clrlib.stylize(
f'Checking directory: {start_path} ... {len(files)} files + {len(sub_dirs)} folders',
[clrlib.attr('dim'), ]), end='\r', flush=True)
subpaths = sorted(subpaths)
files = sorted(files)
sub_dirs = sorted(sub_dirs)
print(clrlib.stylize(
f'Checking directory: {start_path} ... {len(files)} files + {len(sub_dirs)} folders',
[clrlib.attr('dim'), ]))
for file in files:
queue.put(str(file))
for subdir in sub_dirs:
recursive_file_prunning(subdir, queue)
def recursive_empty_dir_prunning(start_path: Path):
print(clrlib.stylize(f'Checking directory: {start_path} ...', [clrlib.attr('dim'), ]),
end='\r', flush=True)
subpaths = list()
sub_dirs = list()
files = list()
for e, subpath in enumerate(start_path.iterdir()):
subpaths.append(subpath)
if subpath.is_dir():
sub_dirs.append(subpath)
elif subpath.is_file():
files.append(subpath)
if (e % 10) == 0:
print(clrlib.stylize(
f'Checking directory: {start_path} ... {len(files)} files + {len(sub_dirs)} folders',
[clrlib.attr('dim'), ]), end='\r', flush=True)
subpaths = sorted(subpaths)
sub_dirs = sorted(sub_dirs)
files = sorted(files)
print(clrlib.stylize(
f'Checking directory: {start_path} ... {len(files)} files + {len(sub_dirs)} folders',
[clrlib.attr('dim'), ]))
for subdir in sub_dirs:
recursive_empty_dir_prunning(subdir)
try:
next(start_path.iterdir())
except StopIteration:
print(clrlib.stylize(
f'Removing folder: {start_path}', [
clrlib.fg('red'),
clrlib.attr('bold'),
]
))
start_path.rmdir()
if __name__ == '__main__':
main()