reddit-image-wall-getter/reddit_imgs/_normalizetobmp.py

163 lines
5.9 KiB
Python

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import json
import subprocess
from concurrent.futures import ProcessPoolExecutor as PoolExecutor
from pathlib import Path
from typing import Dict, Union
import hashlib
import sys
from io import BytesIO
import PIL.Image
import reddit_imgs.hashit
from .hashit import hash2pathsegments
from .system.table_fmt import table_fmt
POOL_SIZE = 32
def hexhashof(bts: bytes, using) -> str:
m = using()
m.update(bts)
return m.hexdigest()
def main():
print("Caution: you'll need a lot of disk space.")
input("Press ENTER to continue. ")
image_hashes_extensions_path = Path('i_he.json')
if not image_hashes_extensions_path.exists():
print("Executing prerrequisite...")
reddit_imgs.hashit.main()
image_hashes_extensions = json.loads(
image_hashes_extensions_path.read_text()
)
used_extensions = {i: 0 for i in set(image_hashes_extensions.values())}
for e in image_hashes_extensions.values():
used_extensions[e] += 1
image_hashed_path = Path('i_h')
image_hashed_normalized_path = Path('i_h_n')
image_hashed_normalized_path.mkdir(parents=True, exist_ok=True)
print(table_fmt(
['ext', 'hashes'],
sorted(used_extensions.items(), key=lambda t: (t[1], t[0]))+[
('total', sum(used_extensions.values())),
],
'Extensions',
alignment='^>',
divide_last_line=True,
))
converted_path = Path('i_h_n.json')
converted = dict()
if converted_path.exists():
converted = json.loads(converted_path.read_text())
with PoolExecutor(POOL_SIZE) as pe:
totalcnt = len(image_hashes_extensions)
on_finished_count = 0
def on_finished(job):
nonlocal on_finished_count
on_finished_count += 1
res = job.result()
converted[res['hash']] = res
if on_finished_count % 1000 == 0:
converted_path.write_text(json.dumps(
converted,
indent=1,
))
for seq, (hsh, ext) in enumerate(image_hashes_extensions.items()):
job = pe.submit(
normalize_to_bmp,
seq,
totalcnt,
image_hashed_path.joinpath(
*hash2pathsegments(hsh),
'sample'
),
image_hashed_normalized_path.joinpath(
*hash2pathsegments(hsh),
),
ext,
hsh,
converted.get(hsh, dict(hash=hsh)),
)
job.add_done_callback(on_finished)
# return
converted_path.write_text(json.dumps(
converted,
indent=1,
))
def normalize_to_bmp(seq: int,
total: int,
pathinfile: Path,
pathoutdir: Path,
knownext: str,
hsh: str,
lastout: Dict[str, Union[str, int]],
) -> Dict[str, Union[str, int]]:
needed_info = (
# 'square', 'square_size', 'square_dimens',
'aspect', 'aspect_size', 'aspect_dimens',
)
if len(set(needed_info).difference(lastout.keys())) > 0:
progress_of = '%06d of %06d' % (seq+1, total)
print(f'{progress_of} - Ensuring that "{knownext}" entry exists: {hsh}')
if not pathinfile.exists():
raise FileNotFoundError(pathinfile)
if not (pathoutfile := pathoutdir.joinpath('aspect.bmp')).exists():
print(f'{progress_of} - Converting to BMP keeping aspect')
r = subprocess.run(
['ffmpegthumbnailer',
'-i', str(pathinfile),
'-t', '10%',
'-s', '0',
'-c', 'png',
'-o', '-',
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if r.returncode or len(r.stdout)==0:
sys.stdout.write(r.stderr.decode(errors='ignore'))
raise ValueError('Conversion failed for %r [%s]' % (pathinfile, knownext))
r.check_returncode()
pathoutdir.mkdir(parents=True, exist_ok=True)
PIL.Image.open(BytesIO(r.stdout)).save(pathoutfile)
if len(set(['aspect', 'aspect_size', 'aspect_dimens']).difference(lastout.keys())) > 0:
print(f'{progress_of} - Extacting BMP data from aspect')
pathoutfile = pathoutdir.joinpath('aspect.bmp')
bts = pathoutfile.read_bytes()
aspectsha256 = hexhashof(bts, hashlib.sha256)
pathoutdir.joinpath('aspect.bmp.sha256').write_text(aspectsha256)
lastout['aspect'] = aspectsha256
lastout['aspect_size'] = len(bts)
lastout['aspect_dimens'] = PIL.Image.open(pathoutfile).size
# if not (pathoutfile := pathoutdir.joinpath('square.bmp')).exists():
# print(f'{progress_of} - Converting to BMP square')
# im = PIL.Image.open(pathoutdir.joinpath('aspect.bmp'))
# ax, ay = im.size
# am = max(ax, ay)
# sq = PIL.Image.new('RGB', (am, am))
# sq.paste(im, ((am-ax)//2, (am-ay)//2))
# sq.save(pathoutfile)
# if len(set(['square', 'square_size', 'square_dimens']).difference(lastout.keys())) > 0:
# print(f'{progress_of} - Extacting BMP data from square')
# pathoutfile = pathoutdir.joinpath('square.bmp')
# bts = pathoutfile.read_bytes()
# aspectsha256 = hexhashof(bts, hashlib.sha256)
# pathoutdir.joinpath('square.bmp.sha256').write_text(aspectsha256)
# lastout['square'] = aspectsha256
# lastout['square_size'] = len(bts)
# lastout['square_dimens'] = PIL.Image.open(pathoutfile).size
return lastout
if __name__ == "__main__":
main()