reddit-image-wall-getter/reddit_imgs/display_fetch_futures.py

646 lines
22 KiB
Python

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import datetime
import importlib
import shutil
import time
import traceback
from io import StringIO
from pathlib import Path
from time import sleep
from typing import Dict, List, Union
import colored
from .system.format_file_size import Numeric, format_power2, format_power10
Numeric = Union[int, float]
ESC = '\033'
LINK_MEMORY_SIZE = 64
JOB_BYTES_MEMORY_SIZE = 4
JOB_LINKS_MEMORY_SIZE = 8
def reverse_mapping_list(d: dict) -> dict:
r = {k: list() for k in set(d.values())}
for k, v in d.items():
r[v].append(k)
return r
def reverse_mapping(d: dict) -> dict:
return {v: k for k, v in d.items()}
def frequency_dict(l: list) -> dict:
f = {e: 0 for e in set(l)}
for e in l:
f[e] += 1
return f
def print_terminal(workers_state_path: Path, keep_to_next_cycle=None):
jobs = list(map(lambda a: a.name, filter(
lambda a: '=' not in a.name,
workers_state_path.iterdir())))
if len(jobs) == 0:
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
print(
f'Waiting for jobs @ {datetime.datetime.now()}', end='', flush=True)
print(ESC+'[K', flush=True)
return
jobs_sequenced_by_guess = sorted(
jobs,
key=lambda a: (
0 if ':' not in a else int(a.split(':', 1)[1]),
a,
)
)
jobs_state_description_content = dict()
jobs_lines = dict()
jobs_queues = dict()
jobs_enqueueds = dict()
jobs_states = dict()
jobs_dates = dict()
jobs_latest_link = dict()
start_date = datetime.datetime.now()
try:
start_date = datetime.datetime.fromtimestamp(
workers_state_path.stat().st_mtime)
jobs_lines = {
job: int(
workers_state_path.joinpath(job+'=line')
.read_text()
)
for job in jobs
if workers_state_path.joinpath(job+'=line').exists()
}
while len(jobs_sequenced_by_guess) > 0:
job, *jobs_sequenced_by_guess = jobs_sequenced_by_guess
if job not in jobs_lines:
jobs_lines[job] = max([-1] + list(jobs_lines.values())) + 1
del job
del jobs_sequenced_by_guess
jobs_state_description_content = {
job: workers_state_path.joinpath(job).read_text()
for job in jobs
}
jobs_queues = {
job: int(
jobs_state_description_content[job]
.split(':')[1]
)
for job in jobs
}
jobs_enqueueds = {
job: int(
jobs_state_description_content[job]
.split(':')[2]
)
for job in jobs
}
jobs_states = {
job: (
jobs_state_description_content[job]
.split(':')[0]
)
for job in jobs
}
jobs_dates = {
job: (
workers_state_path.joinpath(job)
.stat().st_mtime_ns
)
for job in jobs
}
jobs_files = {
job: int(
jobs_state_description_content[job]
.split(':')[4]
)
for job in jobs
}
jobs_bytes = {
job: int(
jobs_state_description_content[job]
.split(':')[3]
)
for job in jobs
}
jobs_latest_link = {
job: (None
if len(parts := jobs_state_description_content[job].split(':', 6)) < 6
else parts[5])
for job in jobs
}
except KeyboardInterrupt:
raise
except:
# print(ESC+'[2J', end='', flush=True)
# print(ESC+'[0;0H', end='', flush=True)
# traceback.print_exc()
return keep_to_next_cycle
# print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
if keep_to_next_cycle is None:
keep_to_next_cycle = dict()
displayatbottom = ''
jobs_sizes = {job: len(
job +
'@' +
(str(jobs_enqueueds[job])+'/' if jobs_states[job] in ('running', 'scrubbing') else '') +
str(jobs_queues[job])
)
for job in jobs_lines.keys()}
state_sequence = [
('finished',),
('running', 'scrubbing'),
('enqueued',),
('waiting',),
('failed',),
]
block_sequence = ' \u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588'
state_stats = frequency_dict(list(jobs_states.values()))
links_stats = dict(waiting=0, enqueued=0, scrubbing=0,
running1=0, running2=0, finished=0,
failed=0)
for job in jobs:
state = jobs_states[job]
jq1 = jobs_queues.get(job, 0)
jq2 = jobs_enqueueds.get(job, 0)
if state not in ('running', 'scrubbing'):
if state not in links_stats:
links_stats[state] = 0
links_stats[state] += jq1
elif state == 'scrubbing':
links_stats['running1'] += jq2
links_stats['running2'] += jq1-jq2
else:
links_stats['running1'] += jq2
links_stats['running2'] += jq1-jq2
term_sz = shutil.get_terminal_size()
jobs_sorted = list(map(lambda a: a[1], sorted(
reverse_mapping(jobs_lines).items())))
jobsdates_list = list(reversed(sorted(jobs_dates.values())))
jobs_daterank = {
job: jobsdates_list.index(date)
for job, date in jobs_dates.items()}
bg_rank_color_names = [
# "grey_93", # 24
# "grey_89", # 23
# "grey_85", # 22
# "grey_82", # 21
# "grey_78", # 20
# "grey_74", # 19
# "grey_70", # 18
# "grey_66", # 17
"grey_62", # 16
"grey_58", # 15
"grey_54", # 14
"grey_50", # 13
"grey_46", # 12
"grey_42", # 11
"grey_39", # 10
"grey_35", # 9
"grey_30", # 8
"grey_27", # 7
"grey_23", # 6
"grey_19", # 5
"grey_15", # 4
"grey_11", # 3
"grey_7", # 2
"grey_3", # 1
]
bg_rank = [
colored.bg(clr)
for clr in bg_rank_color_names
]
bg_rank_size = max(
1,
state_stats.get('running', 0) + state_stats.get('scrubbing', 0)
)
bg_rank = bg_rank[-bg_rank_size:]
bg_rang_programmed_len = len(bg_rank)
bg_rang_programmed_len = bg_rank_size
bg_rank += [colored.bg('black')] * (len(jobs_dates) - len(bg_rank))
# jobs_timestamps = keep_to_next_cycle.get(
# 'jobs_timestamps', dict())
# keep_to_next_cycle['jobs_timestamps'] = jobs_timestamps
# for job, state in jobs_states.items():
# if state in ('running', 'scrubbing'):
# if (db := (jobs_dates[job], jobs_enqueueds[job])) not in (jqh := jobs_timestamps.get(job, list())):
# jqh.append(db)
# jobs_timestamps[job] = jqh[-JOB_LINKS_MEMORY_SIZE:]
# else:
# if job in jobs_timestamps:
# del jobs_timestamps[job]
# jobs_timestamps_transitions = dict()
# jobs_links_avgs = list()
# jobs_links_vars = list()
# for job, timestamps_history in jobs_timestamps.items():
# job_timestamp_transitions = list(map(
# lambda b: tuple(map(
# lambda a: a[1] - a[0],
# zip(*tuple(map(
# lambda a: (a[0]/10**9, -a[1]),
# b
# ))))),
# zip(
# [timestamps_history[0]] + timestamps_history,
# timestamps_history + [timestamps_history[-1]],
# )))[1:-1]
# job_timestamp = list(map(
# lambda a: a[1]/a[0],
# filter(lambda a: a[0]!=0 and a[1]!=0, job_timestamp_transitions)
# ))
# if len(job_timestamp) > 0:
# job_timestamp_avg = sum(job_timestamp)/len(job_timestamp)
# jobs_links_avgs.append(job_timestamp_avg)
# diff = list(map(
# lambda lpd: (lpd - job_timestamp_avg),
# job_timestamp
# ))
# diffsqd = list(map(
# lambda d: d**2,
# diff
# ))
# job_timestamp_var = sum(diffsqd)/len(diffsqd)
# jobs_links_vars.append(job_timestamp_var)
# job_links_avg = sum(jobs_links_avgs)
# job_links_var = .0
# if len(jobs_links_vars) > 0:
# jobs_links_var = sum(jobs_links_vars)
# links_per_sec = sum(map(lambda a: 1/max(2**-30, a), jobs_links_avgs))
# print(jobs_links_avgs, ESC+'[K')
# print(jobs_links_vars, ESC+'[K')
# print(jobs_timestamps_transitions)
# return keep_to_next_cycle
link_processing_timestamps = keep_to_next_cycle.get(
'link_processing_timestamps', list())
for link_processing_timestamp in jobs_dates.values():
if link_processing_timestamp not in link_processing_timestamps:
link_processing_timestamps.append(link_processing_timestamp)
link_processing_timestamps = list(reversed(sorted(link_processing_timestamps)))[
:max(state_stats.get("running", 0), LINK_MEMORY_SIZE)]
keep_to_next_cycle['link_processing_timestamps'] = link_processing_timestamps
link_processing_deltas = list(map(
lambda t: (t[0]-t[1])/10**9,
zip(
[time.time()*10**9]+link_processing_timestamps,
link_processing_timestamps+[link_processing_timestamps[-1]]
)))[0:-1]
link_processing_deltas_avg = sum(
link_processing_deltas+[0])/max(1, len(link_processing_deltas))
link_processing_deltas_var = 0
if (l := len(link_processing_deltas)) > 0:
diff = list(map(
lambda lpd: (lpd - link_processing_deltas_avg),
link_processing_deltas
))
diffsqd = list(map(
lambda d: d**2,
diff
))
link_processing_deltas_var = sum(diffsqd)/l
job_links_avg = link_processing_deltas_avg
job_links_var = link_processing_deltas_var
download_pending_count = (
links_stats.get("waiting", 0) +
links_stats.get("enqueued", 0) +
links_stats.get("running1", 0)
)
links_per_sec = 1/max(2**-30, job_links_avg)
seconds_to_finish = download_pending_count*job_links_avg
datetime_when_finished = datetime.datetime.now(
) + datetime.timedelta(seconds=seconds_to_finish)
time_to_finish = '%2dd %2dh %2dm %2ds' % (
seconds_to_finish//(3600*24),
(seconds_to_finish % (3600*24))//3600,
(seconds_to_finish % 3600)//60,
seconds_to_finish % 60,
)
displayatbottom += f'Speed: {"%6.3f" % links_per_sec} links/s | '
displayatbottom += f'Speed: ##BYTESPERSEC## | '
displayatbottom += f'ETA: {time_to_finish} | '
displayatbottom += f'ETL: {datetime_when_finished} | '
displayatbottom += f'Error: \u00b1{"%6.2f" % (100*(job_links_var**.5)/job_links_avg)}%'
# displayatbottom += f'Error: \u00b1{"%6.2f" % (100*(job_links_var**.5)/job_links_avg)}%'
# displayatbottom += str(link_processing_deltas)
number1colors: Dict[str, List[str]] = dict(
waiting=[
colored.fg('light_gray'),
colored.attr('dim'),
],
enqueued=[
colored.fg('light_red'),
],
scrubbing=[
colored.fg('light_cyan')
],
running=[
colored.fg('light_yellow')
],
finished=[
colored.fg('light_green')
],
)
number1colors['failed'] = number1colors['waiting']
number2colors: Dict[str, List[str]] = number1colors.copy()
number2colors['running'] = [colored.fg('light_cyan')]
number2colors['scrubbing'] = [colored.fg('light_magenta')]
number2colors['running1'] = number1colors['running']
number2colors['running2'] = number2colors['running']
workers_text_stats = (
colored.stylize(
f'{state_stats.get("waiting", 0)} waiting',
number1colors['waiting'],
) +
' - ' +
colored.stylize(
f'{state_stats.get("enqueued", 0)} enqueued',
number1colors['enqueued'],
) +
' - ' +
colored.stylize(
f'{state_stats.get("running", 0)} running',
number1colors['running'],
) +
' \u00b7 ' +
colored.stylize(
f'{state_stats.get("scrubbing", 0)} scrubbing',
number1colors['scrubbing'],
) +
' - ' +
colored.stylize(
f'{state_stats.get("finished", 0)} finished',
number1colors['finished'],
)
)
links_text_stats = (
colored.stylize(
f'{links_stats.get("waiting", 0)} w.',
number2colors['waiting'],
) +
' - ' +
colored.stylize(
f'{links_stats.get("enqueued", 0)} e.',
number2colors['enqueued'],
) +
' - ' +
colored.stylize(
f'{links_stats.get("running1", 0)} staging',
number2colors['running1'],
) +
' \u00b7 ' +
colored.stylize(
f'{links_stats.get("running2", 0)} downloaded',
number2colors['running2'],
) +
' - ' +
colored.stylize(
f'{links_stats.get("finished", 0)} f.',
number2colors['finished'],
)
)
text_stats_size = max(map(len, [workers_text_stats, links_text_stats]))
text_stats_fmt = '{0:^%d}' % text_stats_size
workers_text_stats = text_stats_fmt.format(workers_text_stats)
links_text_stats = text_stats_fmt.format(links_text_stats)
dwldcount = links_stats.get("running2", 0) + links_stats.get("finished", 0)
filecount = sum(jobs_files.values())
bytecount = sum(jobs_bytes.values())
worker_ratio = state_stats.get(
"finished", 0)/(max(1, sum(state_stats.values())))
links_ratio = (links_stats.get("running2", 0) +
links_stats.get("finished", 0))/max(1, sum(links_stats.values()))
jobs_bytes_history = keep_to_next_cycle.get(
'jobs_bytes_history', dict())
keep_to_next_cycle['jobs_bytes_history'] = jobs_bytes_history
for job, state in jobs_states.items():
if state in ('running', 'scrubbing'):
if (db := (jobs_dates[job], jobs_bytes[job])) not in (jbh := jobs_bytes_history.get(job, list())):
jbh.append(db)
jobs_bytes_history[job] = jbh[-JOB_LINKS_MEMORY_SIZE:]
else:
if job in jobs_bytes_history:
del jobs_bytes_history[job]
jobs_speeds_avg = list()
for job, bytes_history in jobs_bytes_history.items():
job_byte_transitions = list(map(
lambda b: tuple(map(
lambda a: a[1] - a[0],
zip(*tuple(map(
lambda a: (a[0]/10**9, a[1]),
b
))))),
zip(
[bytes_history[0]] + bytes_history,
bytes_history + [bytes_history[-1]],
)))[1:-1]
job_speed = list(map(
lambda a: a[1]/a[0],
filter(lambda a: a[0] != 0 and a[1] != 0, job_byte_transitions)
))
if len(job_speed) > 0:
job_speed_avg = sum(job_speed)/len(job_speed)
jobs_speeds_avg.append(job_speed_avg)
jobs_speed_avg = sum(jobs_speeds_avg)
# return keep_to_next_cycle
displayatbottom = displayatbottom.replace(
"##BYTESPERSEC##",
"%10s/s \u00b7 %10sps" % (
str(format_power2(jobs_speed_avg)),
str(format_power10(jobs_speed_avg*8, suffix='b')),
),
)
print(
f'# Monitoring {len(jobs)} jobs ' +
f'@ {datetime.datetime.now()} ' +
f'@ {datetime.datetime.now() - start_date}' +
'',
end='', flush=True)
print(ESC+'[K', flush=True)
# print(ESC+'[K', flush=True)
print('Workers: ' +
'%8.4f%% | ' % (100*worker_ratio) +
workers_text_stats +
' | ' +
f'Current: ' +
f'{dwldcount} links, ' +
f'{filecount} files, ' +
f'{format_power10(bytecount)}' +
'',
end='')
print(ESC+'[K', flush=True)
print('Links: ' +
'%8.4f%% | ' % (100*links_ratio) +
links_text_stats +
' | ' +
f'Expected: ' +
'%.3f fl/lnk, ' % (filecount/max(1, dwldcount)) +
f'{int(filecount/max(2**-30, links_ratio))} files, ' +
f'{format_power10(int(bytecount/max(2**-30, links_ratio)))}' +
'',
end='')
print(ESC+'[K', flush=True)
print('Latest updates gradient: ', end='')
bg_rang_programmed_len_digits = len('%d' % (bg_rang_programmed_len+1))
for i in range(bg_rang_programmed_len+1):
if i == bg_rang_programmed_len:
print(' ', end='')
print('-'*bg_rang_programmed_len_digits, end='')
else:
print(bg_rank[i], end='')
print(' ', end='')
print(('%%0%dd' % bg_rang_programmed_len_digits) % (i+1), end='')
print(' ', end='')
print(colored.attr('reset'), end='')
# print(ESC+'[K', flush=True)
print(ESC+'[K', flush=True)
print('Overall progress: ', end='')
for current_job in jobs_sorted:
current_state = jobs_states[current_job]
number1color = number1colors.get(current_state, list())
number2color = number2colors.get(current_state, list())
current_block_no = ((len(block_sequence)-1)*(
jobs_queues[current_job]-jobs_enqueueds[current_job]))//max(1, jobs_queues[current_job])
print(
colored.stylize(
block_sequence[current_block_no],
[color.replace("38;5;", "48;5;") for color in number1color] +
number2color
),
end=''
)
print(ESC+'[K', flush=True)
max_job_size = max([*jobs_sizes.values(), 0])
per_column = term_sz.columns//(max_job_size+1)
for stateelems in state_sequence:
current_jobs = [job
for job in jobs_sorted
if jobs_states[job] in stateelems]
if len(current_jobs) <= 0:
continue
print('» ', end='')
print(' & '.join([
colored.stylize(stateelem, number1colors[stateelem])
for stateelem in stateelems
]),
end=''
)
print(' «', end='')
print(ESC+'[K', flush=True)
# print(stateelem)
max_job_size = max([0]+[jobs_sizes[job] for job in current_jobs])
per_column = term_sz.columns//(max_job_size+1)
current_jobs = sorted(
current_jobs,
key=lambda j: (
jobs_enqueueds[j]/max(0.00000001, jobs_queues[j]),
jobs_lines[j],)
)
# current_jobs = jobs_sorted.copy()
while len(current_jobs) > 0:
for _ in range(per_column):
if len(current_jobs) > 0:
current_job, *current_jobs = current_jobs
current_state = jobs_states[current_job]
number1color = number1colors.get(current_state, list())
number2color = number2colors.get(current_state, list())
print(''.join(number1color), end='')
print(bg_rank[jobs_daterank[current_job]], end='')
print(' '*(max_job_size-jobs_sizes[current_job]), end='')
print(current_job, end='')
print('@', end='')
print(str(jobs_queues[current_job]), end='')
if current_state in ('running', 'scrubbing'):
print('/', end='')
print(''.join(number2color), end='')
print(str(jobs_enqueueds[current_job]), end='')
print(colored.attr('reset'), end='')
print(' ', end='')
print(ESC+'[K', flush=False)
print(displayatbottom, end=ESC+'[K', flush=True)
print(ESC+'[0J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
return keep_to_next_cycle
def do_cycle_sleep():
sleep(1/60)
def main():
selfmodule_path = (Path(__file__)
.absolute()
.relative_to(Path('.').absolute()
))
selfmodule_name = (
str(selfmodule_path.parent).replace('/', '.') +
'.' +
selfmodule_path.stem)
selfmodule_name = (
selfmodule_name[1:]
if selfmodule_name.startswith('.') else
selfmodule_name)
selfmodule = importlib.import_module(selfmodule_name)
workers_state_path = Path('i_gdl_w')
from_exc = False
keep_to_next_cycle = None
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
while workers_state_path.exists():
try:
selfmodule = importlib.reload(selfmodule)
if from_exc:
from_exc = False
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
keep_to_next_cycle = selfmodule.print_terminal(
workers_state_path, keep_to_next_cycle)
except KeyboardInterrupt:
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
raise
except BaseException:
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
traceback.print_exc()
sio = StringIO()
traceback.print_exc(file=sio)
Path('display_fetch_futures.trace').write_text(sio.getvalue())
from_exc = True
sleep(1)
selfmodule.do_cycle_sleep()
print(ESC+'[0;0H', end='', flush=True)
print(ESC+'[K', end='', flush=True)
print('Queue is empty')
if __name__ == "__main__":
main()