reddit-image-wall-getter/reddit_imgs/display_fetch_futures.py

646 lines
22 KiB
Python
Raw Normal View History

2020-06-05 22:19:45 +00:00
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import datetime
import importlib
import shutil
import time
import traceback
2020-07-20 01:54:26 +00:00
from io import StringIO
2020-06-05 22:19:45 +00:00
from pathlib import Path
from time import sleep
2020-07-20 01:54:26 +00:00
from typing import Dict, List, Union
2020-06-05 22:19:45 +00:00
import colored
2020-07-20 01:54:26 +00:00
from .system.format_file_size import Numeric, format_power2, format_power10
Numeric = Union[int, float]
2020-06-05 22:19:45 +00:00
ESC = '\033'
LINK_MEMORY_SIZE = 64
2020-07-20 01:54:26 +00:00
JOB_BYTES_MEMORY_SIZE = 4
JOB_LINKS_MEMORY_SIZE = 8
2020-06-05 22:19:45 +00:00
def reverse_mapping_list(d: dict) -> dict:
r = {k: list() for k in set(d.values())}
for k, v in d.items():
r[v].append(k)
return r
def reverse_mapping(d: dict) -> dict:
return {v: k for k, v in d.items()}
def frequency_dict(l: list) -> dict:
f = {e: 0 for e in set(l)}
for e in l:
f[e] += 1
return f
def print_terminal(workers_state_path: Path, keep_to_next_cycle=None):
jobs = list(map(lambda a: a.name, filter(
lambda a: '=' not in a.name,
workers_state_path.iterdir())))
if len(jobs) == 0:
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
print(
f'Waiting for jobs @ {datetime.datetime.now()}', end='', flush=True)
print(ESC+'[K', flush=True)
return
2020-07-20 01:54:26 +00:00
jobs_sequenced_by_guess = sorted(
jobs,
key=lambda a: (
0 if ':' not in a else int(a.split(':', 1)[1]),
a,
)
)
jobs_state_description_content = dict()
2020-06-05 22:19:45 +00:00
jobs_lines = dict()
jobs_queues = dict()
jobs_enqueueds = dict()
jobs_states = dict()
jobs_dates = dict()
2020-07-20 01:54:26 +00:00
jobs_latest_link = dict()
start_date = datetime.datetime.now()
2020-06-05 22:19:45 +00:00
try:
2020-07-20 01:54:26 +00:00
start_date = datetime.datetime.fromtimestamp(
workers_state_path.stat().st_mtime)
2020-06-05 22:19:45 +00:00
jobs_lines = {
job: int(
workers_state_path.joinpath(job+'=line')
.read_text()
)
for job in jobs
if workers_state_path.joinpath(job+'=line').exists()
2020-06-05 22:19:45 +00:00
}
2020-07-20 01:54:26 +00:00
while len(jobs_sequenced_by_guess) > 0:
job, *jobs_sequenced_by_guess = jobs_sequenced_by_guess
if job not in jobs_lines:
jobs_lines[job] = max([-1] + list(jobs_lines.values())) + 1
del job
del jobs_sequenced_by_guess
jobs_state_description_content = {
job: workers_state_path.joinpath(job).read_text()
for job in jobs
}
2020-06-05 22:19:45 +00:00
jobs_queues = {
job: int(
2020-07-20 01:54:26 +00:00
jobs_state_description_content[job]
.split(':')[1]
2020-06-05 22:19:45 +00:00
)
for job in jobs
}
jobs_enqueueds = {
job: int(
2020-07-20 01:54:26 +00:00
jobs_state_description_content[job]
.split(':')[2]
2020-06-05 22:19:45 +00:00
)
for job in jobs
}
jobs_states = {
job: (
2020-07-20 01:54:26 +00:00
jobs_state_description_content[job]
.split(':')[0]
2020-06-05 22:19:45 +00:00
)
for job in jobs
}
jobs_dates = {
job: (
workers_state_path.joinpath(job)
.stat().st_mtime_ns
)
for job in jobs
}
jobs_files = {
job: int(
2020-07-20 01:54:26 +00:00
jobs_state_description_content[job]
.split(':')[4]
2020-06-05 22:19:45 +00:00
)
for job in jobs
}
jobs_bytes = {
job: int(
2020-07-20 01:54:26 +00:00
jobs_state_description_content[job]
.split(':')[3]
2020-06-05 22:19:45 +00:00
)
for job in jobs
}
2020-07-20 01:54:26 +00:00
jobs_latest_link = {
job: (None
if len(parts := jobs_state_description_content[job].split(':', 6)) < 6
else parts[5])
for job in jobs
}
2020-06-05 22:19:45 +00:00
except KeyboardInterrupt:
raise
except:
2020-07-20 01:54:26 +00:00
# print(ESC+'[2J', end='', flush=True)
# print(ESC+'[0;0H', end='', flush=True)
# traceback.print_exc()
2020-06-05 22:19:45 +00:00
return keep_to_next_cycle
# print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
if keep_to_next_cycle is None:
keep_to_next_cycle = dict()
displayatbottom = ''
jobs_sizes = {job: len(
job +
'@' +
(str(jobs_enqueueds[job])+'/' if jobs_states[job] in ('running', 'scrubbing') else '') +
str(jobs_queues[job])
)
for job in jobs_lines.keys()}
2020-07-20 01:54:26 +00:00
state_sequence = [
('finished',),
('running', 'scrubbing'),
('enqueued',),
('waiting',),
('failed',),
]
block_sequence = ' \u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588'
2020-06-05 22:19:45 +00:00
state_stats = frequency_dict(list(jobs_states.values()))
links_stats = dict(waiting=0, enqueued=0, scrubbing=0,
2020-07-20 01:54:26 +00:00
running1=0, running2=0, finished=0,
failed=0)
2020-06-05 22:19:45 +00:00
for job in jobs:
state = jobs_states[job]
jq1 = jobs_queues.get(job, 0)
jq2 = jobs_enqueueds.get(job, 0)
if state not in ('running', 'scrubbing'):
if state not in links_stats:
links_stats[state] = 0
links_stats[state] += jq1
elif state == 'scrubbing':
2020-07-20 01:54:26 +00:00
links_stats['running1'] += jq2
links_stats['running2'] += jq1-jq2
2020-06-05 22:19:45 +00:00
else:
links_stats['running1'] += jq2
links_stats['running2'] += jq1-jq2
term_sz = shutil.get_terminal_size()
jobs_sorted = list(map(lambda a: a[1], sorted(
reverse_mapping(jobs_lines).items())))
jobsdates_list = list(reversed(sorted(jobs_dates.values())))
jobs_daterank = {
job: jobsdates_list.index(date)
for job, date in jobs_dates.items()}
bg_rank_color_names = [
# "grey_93", # 24
# "grey_89", # 23
# "grey_85", # 22
# "grey_82", # 21
# "grey_78", # 20
# "grey_74", # 19
# "grey_70", # 18
# "grey_66", # 17
"grey_62", # 16
"grey_58", # 15
"grey_54", # 14
"grey_50", # 13
"grey_46", # 12
"grey_42", # 11
"grey_39", # 10
"grey_35", # 9
"grey_30", # 8
"grey_27", # 7
"grey_23", # 6
"grey_19", # 5
"grey_15", # 4
"grey_11", # 3
"grey_7", # 2
"grey_3", # 1
]
bg_rank = [
colored.bg(clr)
for clr in bg_rank_color_names
]
2020-11-06 00:08:05 +00:00
bg_rank_size = max(
2020-07-20 01:54:26 +00:00
1,
state_stats.get('running', 0) + state_stats.get('scrubbing', 0)
2020-11-06 00:08:05 +00:00
)
bg_rank = bg_rank[-bg_rank_size:]
2020-06-05 22:19:45 +00:00
bg_rang_programmed_len = len(bg_rank)
2020-11-06 00:08:05 +00:00
bg_rang_programmed_len = bg_rank_size
bg_rank += [colored.bg('black')] * (len(jobs_dates) - len(bg_rank))
2020-06-05 22:19:45 +00:00
2020-07-20 01:54:26 +00:00
# jobs_timestamps = keep_to_next_cycle.get(
# 'jobs_timestamps', dict())
# keep_to_next_cycle['jobs_timestamps'] = jobs_timestamps
# for job, state in jobs_states.items():
# if state in ('running', 'scrubbing'):
# if (db := (jobs_dates[job], jobs_enqueueds[job])) not in (jqh := jobs_timestamps.get(job, list())):
# jqh.append(db)
# jobs_timestamps[job] = jqh[-JOB_LINKS_MEMORY_SIZE:]
# else:
# if job in jobs_timestamps:
# del jobs_timestamps[job]
# jobs_timestamps_transitions = dict()
# jobs_links_avgs = list()
# jobs_links_vars = list()
# for job, timestamps_history in jobs_timestamps.items():
# job_timestamp_transitions = list(map(
# lambda b: tuple(map(
# lambda a: a[1] - a[0],
# zip(*tuple(map(
# lambda a: (a[0]/10**9, -a[1]),
# b
# ))))),
# zip(
# [timestamps_history[0]] + timestamps_history,
# timestamps_history + [timestamps_history[-1]],
# )))[1:-1]
# job_timestamp = list(map(
# lambda a: a[1]/a[0],
# filter(lambda a: a[0]!=0 and a[1]!=0, job_timestamp_transitions)
# ))
# if len(job_timestamp) > 0:
# job_timestamp_avg = sum(job_timestamp)/len(job_timestamp)
# jobs_links_avgs.append(job_timestamp_avg)
# diff = list(map(
# lambda lpd: (lpd - job_timestamp_avg),
# job_timestamp
# ))
# diffsqd = list(map(
# lambda d: d**2,
# diff
# ))
# job_timestamp_var = sum(diffsqd)/len(diffsqd)
# jobs_links_vars.append(job_timestamp_var)
# job_links_avg = sum(jobs_links_avgs)
# job_links_var = .0
# if len(jobs_links_vars) > 0:
# jobs_links_var = sum(jobs_links_vars)
# links_per_sec = sum(map(lambda a: 1/max(2**-30, a), jobs_links_avgs))
# print(jobs_links_avgs, ESC+'[K')
# print(jobs_links_vars, ESC+'[K')
# print(jobs_timestamps_transitions)
# return keep_to_next_cycle
2020-06-05 22:19:45 +00:00
link_processing_timestamps = keep_to_next_cycle.get(
'link_processing_timestamps', list())
for link_processing_timestamp in jobs_dates.values():
if link_processing_timestamp not in link_processing_timestamps:
link_processing_timestamps.append(link_processing_timestamp)
link_processing_timestamps = list(reversed(sorted(link_processing_timestamps)))[
:max(state_stats.get("running", 0), LINK_MEMORY_SIZE)]
keep_to_next_cycle['link_processing_timestamps'] = link_processing_timestamps
link_processing_deltas = list(map(
lambda t: (t[0]-t[1])/10**9,
zip(
[time.time()*10**9]+link_processing_timestamps,
link_processing_timestamps+[link_processing_timestamps[-1]]
)))[0:-1]
link_processing_deltas_avg = sum(
link_processing_deltas+[0])/max(1, len(link_processing_deltas))
link_processing_deltas_var = 0
if (l := len(link_processing_deltas)) > 0:
diff = list(map(
lambda lpd: (lpd - link_processing_deltas_avg),
link_processing_deltas
))
diffsqd = list(map(
lambda d: d**2,
diff
))
link_processing_deltas_var = sum(diffsqd)/l
2020-07-20 01:54:26 +00:00
job_links_avg = link_processing_deltas_avg
job_links_var = link_processing_deltas_var
2020-06-05 22:19:45 +00:00
download_pending_count = (
links_stats.get("waiting", 0) +
links_stats.get("enqueued", 0) +
links_stats.get("running1", 0)
)
2020-07-20 01:54:26 +00:00
links_per_sec = 1/max(2**-30, job_links_avg)
seconds_to_finish = download_pending_count*job_links_avg
2020-06-05 22:19:45 +00:00
datetime_when_finished = datetime.datetime.now(
) + datetime.timedelta(seconds=seconds_to_finish)
time_to_finish = '%2dd %2dh %2dm %2ds' % (
seconds_to_finish//(3600*24),
(seconds_to_finish % (3600*24))//3600,
(seconds_to_finish % 3600)//60,
seconds_to_finish % 60,
)
2020-07-20 01:54:26 +00:00
displayatbottom += f'Speed: {"%6.3f" % links_per_sec} links/s | '
displayatbottom += f'Speed: ##BYTESPERSEC## | '
2020-06-05 22:19:45 +00:00
displayatbottom += f'ETA: {time_to_finish} | '
displayatbottom += f'ETL: {datetime_when_finished} | '
2020-07-20 01:54:26 +00:00
displayatbottom += f'Error: \u00b1{"%6.2f" % (100*(job_links_var**.5)/job_links_avg)}%'
# displayatbottom += f'Error: \u00b1{"%6.2f" % (100*(job_links_var**.5)/job_links_avg)}%'
2020-06-05 22:19:45 +00:00
# displayatbottom += str(link_processing_deltas)
2020-07-20 01:54:26 +00:00
number1colors: Dict[str, List[str]] = dict(
2020-06-05 22:19:45 +00:00
waiting=[
colored.fg('light_gray'),
colored.attr('dim'),
],
enqueued=[
colored.fg('light_red'),
],
scrubbing=[
colored.fg('light_cyan')
],
running=[
colored.fg('light_yellow')
],
finished=[
colored.fg('light_green')
],
)
2020-07-20 01:54:26 +00:00
number1colors['failed'] = number1colors['waiting']
number2colors: Dict[str, List[str]] = number1colors.copy()
number2colors['running'] = [colored.fg('light_cyan')]
number2colors['scrubbing'] = [colored.fg('light_magenta')]
number2colors['running1'] = number1colors['running']
2020-06-05 22:19:45 +00:00
number2colors['running2'] = number2colors['running']
2020-07-20 01:54:26 +00:00
workers_text_stats = (
colored.stylize(
f'{state_stats.get("waiting", 0)} waiting',
number1colors['waiting'],
) +
' - ' +
colored.stylize(
f'{state_stats.get("enqueued", 0)} enqueued',
number1colors['enqueued'],
) +
' - ' +
colored.stylize(
f'{state_stats.get("running", 0)} running',
number1colors['running'],
) +
' \u00b7 ' +
colored.stylize(
f'{state_stats.get("scrubbing", 0)} scrubbing',
number1colors['scrubbing'],
) +
' - ' +
colored.stylize(
f'{state_stats.get("finished", 0)} finished',
number1colors['finished'],
)
)
links_text_stats = (
colored.stylize(
f'{links_stats.get("waiting", 0)} w.',
number2colors['waiting'],
) +
' - ' +
colored.stylize(
f'{links_stats.get("enqueued", 0)} e.',
number2colors['enqueued'],
) +
' - ' +
colored.stylize(
f'{links_stats.get("running1", 0)} staging',
number2colors['running1'],
) +
' \u00b7 ' +
colored.stylize(
f'{links_stats.get("running2", 0)} downloaded',
number2colors['running2'],
) +
' - ' +
colored.stylize(
f'{links_stats.get("finished", 0)} f.',
number2colors['finished'],
)
)
text_stats_size = max(map(len, [workers_text_stats, links_text_stats]))
text_stats_fmt = '{0:^%d}' % text_stats_size
workers_text_stats = text_stats_fmt.format(workers_text_stats)
links_text_stats = text_stats_fmt.format(links_text_stats)
dwldcount = links_stats.get("running2", 0) + links_stats.get("finished", 0)
filecount = sum(jobs_files.values())
bytecount = sum(jobs_bytes.values())
worker_ratio = state_stats.get(
"finished", 0)/(max(1, sum(state_stats.values())))
links_ratio = (links_stats.get("running2", 0) +
links_stats.get("finished", 0))/max(1, sum(links_stats.values()))
jobs_bytes_history = keep_to_next_cycle.get(
'jobs_bytes_history', dict())
keep_to_next_cycle['jobs_bytes_history'] = jobs_bytes_history
for job, state in jobs_states.items():
if state in ('running', 'scrubbing'):
if (db := (jobs_dates[job], jobs_bytes[job])) not in (jbh := jobs_bytes_history.get(job, list())):
jbh.append(db)
jobs_bytes_history[job] = jbh[-JOB_LINKS_MEMORY_SIZE:]
else:
if job in jobs_bytes_history:
del jobs_bytes_history[job]
jobs_speeds_avg = list()
for job, bytes_history in jobs_bytes_history.items():
job_byte_transitions = list(map(
lambda b: tuple(map(
lambda a: a[1] - a[0],
zip(*tuple(map(
lambda a: (a[0]/10**9, a[1]),
b
))))),
zip(
[bytes_history[0]] + bytes_history,
bytes_history + [bytes_history[-1]],
)))[1:-1]
job_speed = list(map(
lambda a: a[1]/a[0],
filter(lambda a: a[0] != 0 and a[1] != 0, job_byte_transitions)
))
if len(job_speed) > 0:
job_speed_avg = sum(job_speed)/len(job_speed)
jobs_speeds_avg.append(job_speed_avg)
jobs_speed_avg = sum(jobs_speeds_avg)
# return keep_to_next_cycle
displayatbottom = displayatbottom.replace(
"##BYTESPERSEC##",
"%10s/s \u00b7 %10sps" % (
str(format_power2(jobs_speed_avg)),
str(format_power10(jobs_speed_avg*8, suffix='b')),
),
)
2020-06-05 22:19:45 +00:00
print(
2020-07-20 01:54:26 +00:00
f'# Monitoring {len(jobs)} jobs ' +
f'@ {datetime.datetime.now()} ' +
f'@ {datetime.datetime.now() - start_date}' +
'',
end='', flush=True)
2020-06-05 22:19:45 +00:00
print(ESC+'[K', flush=True)
2020-07-20 01:54:26 +00:00
# print(ESC+'[K', flush=True)
2020-06-05 22:19:45 +00:00
print('Workers: ' +
2020-07-20 01:54:26 +00:00
'%8.4f%% | ' % (100*worker_ratio) +
workers_text_stats +
' | ' +
f'Current: ' +
f'{dwldcount} links, ' +
f'{filecount} files, ' +
f'{format_power10(bytecount)}' +
'',
2020-06-05 22:19:45 +00:00
end='')
print(ESC+'[K', flush=True)
print('Links: ' +
2020-07-20 01:54:26 +00:00
'%8.4f%% | ' % (100*links_ratio) +
links_text_stats +
' | ' +
f'Expected: ' +
'%.3f fl/lnk, ' % (filecount/max(1, dwldcount)) +
f'{int(filecount/max(2**-30, links_ratio))} files, ' +
f'{format_power10(int(bytecount/max(2**-30, links_ratio)))}' +
'',
2020-06-05 22:19:45 +00:00
end='')
print(ESC+'[K', flush=True)
print('Latest updates gradient: ', end='')
bg_rang_programmed_len_digits = len('%d' % (bg_rang_programmed_len+1))
for i in range(bg_rang_programmed_len+1):
if i == bg_rang_programmed_len:
print(' ', end='')
print('-'*bg_rang_programmed_len_digits, end='')
else:
print(bg_rank[i], end='')
print(' ', end='')
print(('%%0%dd' % bg_rang_programmed_len_digits) % (i+1), end='')
print(' ', end='')
print(colored.attr('reset'), end='')
2020-07-20 01:54:26 +00:00
# print(ESC+'[K', flush=True)
2020-06-05 22:19:45 +00:00
print(ESC+'[K', flush=True)
2020-07-20 01:54:26 +00:00
print('Overall progress: ', end='')
for current_job in jobs_sorted:
current_state = jobs_states[current_job]
number1color = number1colors.get(current_state, list())
number2color = number2colors.get(current_state, list())
current_block_no = ((len(block_sequence)-1)*(
jobs_queues[current_job]-jobs_enqueueds[current_job]))//max(1, jobs_queues[current_job])
print(
colored.stylize(
block_sequence[current_block_no],
[color.replace("38;5;", "48;5;") for color in number1color] +
number2color
),
end=''
)
2020-06-05 22:19:45 +00:00
print(ESC+'[K', flush=True)
2020-07-20 01:54:26 +00:00
max_job_size = max([*jobs_sizes.values(), 0])
per_column = term_sz.columns//(max_job_size+1)
for stateelems in state_sequence:
current_jobs = [job
for job in jobs_sorted
if jobs_states[job] in stateelems]
if len(current_jobs) <= 0:
continue
print('» ', end='')
print(' & '.join([
colored.stylize(stateelem, number1colors[stateelem])
for stateelem in stateelems
]),
end=''
)
print(' «', end='')
print(ESC+'[K', flush=True)
# print(stateelem)
max_job_size = max([0]+[jobs_sizes[job] for job in current_jobs])
per_column = term_sz.columns//(max_job_size+1)
current_jobs = sorted(
current_jobs,
key=lambda j: (
jobs_enqueueds[j]/max(0.00000001, jobs_queues[j]),
jobs_lines[j],)
)
# current_jobs = jobs_sorted.copy()
while len(current_jobs) > 0:
for _ in range(per_column):
if len(current_jobs) > 0:
current_job, *current_jobs = current_jobs
current_state = jobs_states[current_job]
number1color = number1colors.get(current_state, list())
number2color = number2colors.get(current_state, list())
2020-06-05 22:19:45 +00:00
print(''.join(number1color), end='')
2020-07-20 01:54:26 +00:00
print(bg_rank[jobs_daterank[current_job]], end='')
print(' '*(max_job_size-jobs_sizes[current_job]), end='')
print(current_job, end='')
print('@', end='')
print(str(jobs_queues[current_job]), end='')
if current_state in ('running', 'scrubbing'):
print('/', end='')
print(''.join(number2color), end='')
print(str(jobs_enqueueds[current_job]), end='')
print(colored.attr('reset'), end='')
print(' ', end='')
print(ESC+'[K', flush=False)
2020-06-05 22:19:45 +00:00
print(displayatbottom, end=ESC+'[K', flush=True)
print(ESC+'[0J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
return keep_to_next_cycle
def do_cycle_sleep():
sleep(1/60)
def main():
selfmodule_path = (Path(__file__)
.absolute()
.relative_to(Path('.').absolute()
))
selfmodule_name = (
str(selfmodule_path.parent).replace('/', '.') +
'.' +
selfmodule_path.stem)
selfmodule_name = (
selfmodule_name[1:]
if selfmodule_name.startswith('.') else
selfmodule_name)
selfmodule = importlib.import_module(selfmodule_name)
workers_state_path = Path('i_gdl_w')
from_exc = False
keep_to_next_cycle = None
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
while workers_state_path.exists():
try:
selfmodule = importlib.reload(selfmodule)
if from_exc:
from_exc = False
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
keep_to_next_cycle = selfmodule.print_terminal(
workers_state_path, keep_to_next_cycle)
except KeyboardInterrupt:
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
raise
except BaseException:
print(ESC+'[2J', end='', flush=True)
print(ESC+'[0;0H', end='', flush=True)
traceback.print_exc()
2020-07-20 01:54:26 +00:00
sio = StringIO()
traceback.print_exc(file=sio)
Path('display_fetch_futures.trace').write_text(sio.getvalue())
2020-06-05 22:19:45 +00:00
from_exc = True
sleep(1)
selfmodule.do_cycle_sleep()
print(ESC+'[0;0H', end='', flush=True)
print(ESC+'[K', end='', flush=True)
print('Queue is empty')
if __name__ == "__main__":
main()