reddit-image-wall-getter/reddit_imgs/normalizetobmp.py

161 lines
5.7 KiB
Python

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import json
import subprocess
from concurrent.futures import ProcessPoolExecutor as PoolExecutor
from pathlib import Path
from typing import Dict, Union
import hashlib
import sys
from io import BytesIO
import PIL.Image
import reddit_imgs.hashit
from .hashit import hash2pathsegments
from .system.table_fmt import table_fmt
POOL_SIZE = 32
def hexhashof(bts: bytes, using) -> str:
m = using()
m.update(bts)
return m.hexdigest()
def main():
image_hashes_extensions_path = Path('i_he.json')
if not image_hashes_extensions_path.exists():
print("Executing prerrequisite...")
reddit_imgs.hashit.main()
image_hashes_extensions = json.loads(
image_hashes_extensions_path.read_text()
)
used_extensions = {i: 0 for i in set(image_hashes_extensions.values())}
for e in image_hashes_extensions.values():
used_extensions[e] += 1
image_hashed_path = Path('i_h')
image_hashed_normalized_path = Path('i_h_n')
image_hashed_normalized_path.mkdir(parents=True, exist_ok=True)
print(table_fmt(
['ext', 'hashes'],
sorted(used_extensions.items(), key=lambda t: (t[1], t[0]))+[
('total', sum(used_extensions.values())),
],
'Extensions',
alignment='^>',
divide_last_line=True,
))
converted_path = Path('i_h_n.json')
converted = dict()
if converted_path.exists():
converted = json.loads(converted_path.read_text())
with PoolExecutor(POOL_SIZE) as pe:
totalcnt = len(image_hashes_extensions)
on_finished_count = 0
def on_finished(job):
nonlocal on_finished_count
on_finished_count += 1
res = job.result()
converted[res['hash']] = res
if on_finished_count % 1000 == 0:
converted_path.write_text(json.dumps(
converted,
indent=1,
))
for seq, (hsh, ext) in enumerate(image_hashes_extensions.items()):
job = pe.submit(
normalize_to_bmp,
seq,
totalcnt,
image_hashed_path.joinpath(
*hash2pathsegments(hsh),
'sample'
),
image_hashed_normalized_path.joinpath(
*hash2pathsegments(hsh),
),
ext,
hsh,
converted.get(hsh, dict(hash=hsh)),
)
job.add_done_callback(on_finished)
# return
converted_path.write_text(json.dumps(
converted,
indent=1,
))
def normalize_to_bmp(seq: int,
total: int,
pathinfile: Path,
pathoutdir: Path,
knownext: str,
hsh: str,
lastout: Dict[str, Union[str, int]],
) -> Dict[str, Union[str, int]]:
needed_info = (
'square', 'square_size', 'square_dimens',
'aspect', 'aspect_size', 'aspect_dimens',
)
if len(set(needed_info).difference(lastout.keys())) > 0:
progress_of = '%06d of %06d' % (seq+1, total)
print(f'{progress_of} - Ensuring that "{knownext}" entry exists: {hsh}')
if not pathinfile.exists():
raise FileNotFoundError(pathinfile)
if not (pathoutfile := pathoutdir.joinpath('aspect.bmp')).exists():
print(f'{progress_of} - Converting to BMP keeping aspect')
r = subprocess.run(
['ffmpegthumbnailer',
'-i', str(pathinfile),
'-t', '10%',
'-s', '0',
'-c', 'png',
'-o', '-',
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if r.returncode or len(r.stdout)==0:
sys.stdout.write(r.stderr.decode(errors='ignore'))
raise ValueError('Conversion failed for %r' % pathinfile)
r.check_returncode()
pathoutdir.mkdir(parents=True, exist_ok=True)
PIL.Image.open(BytesIO(r.stdout)).save(pathoutfile)
if len(set(['aspect', 'aspect_size', 'aspect_dimens']).difference(lastout.keys())) > 0:
print(f'{progress_of} - Extacting BMP data from aspect')
pathoutfile = pathoutdir.joinpath('aspect.bmp')
bts = pathoutfile.read_bytes()
aspectsha256 = hexhashof(bts, hashlib.sha256)
pathoutdir.joinpath('aspect.bmp.sha256').write_text(aspectsha256)
lastout['aspect'] = aspectsha256
lastout['aspect_size'] = len(bts)
lastout['aspect_dimens'] = PIL.Image.open(pathoutfile).size
if not (pathoutfile := pathoutdir.joinpath('square.bmp')).exists():
print(f'{progress_of} - Converting to BMP square')
im = PIL.Image.open(pathoutdir.joinpath('aspect.bmp'))
ax, ay = im.size
am = max(ax, ay)
sq = PIL.Image.new('RGB', (am, am))
sq.paste(im, ((am-ax)//2, (am-ay)//2))
sq.save(pathoutfile)
if len(set(['square', 'square_size', 'square_dimens']).difference(lastout.keys())) > 0:
print(f'{progress_of} - Extacting BMP data from square')
pathoutfile = pathoutdir.joinpath('square.bmp')
bts = pathoutfile.read_bytes()
aspectsha256 = hexhashof(bts, hashlib.sha256)
pathoutdir.joinpath('square.bmp.sha256').write_text(aspectsha256)
lastout['square'] = aspectsha256
lastout['square_size'] = len(bts)
lastout['square_dimens'] = PIL.Image.open(pathoutfile).size
return lastout
if __name__ == "__main__":
main()