#!/usr/bin/env python3 # -*- encoding: utf-8 -*- import datetime import importlib import shutil import time import traceback from io import StringIO from pathlib import Path from time import sleep from typing import Dict, List, Union import colored from .system.format_file_size import Numeric, format_power2, format_power10 Numeric = Union[int, float] ESC = '\033' LINK_MEMORY_SIZE = 64 JOB_BYTES_MEMORY_SIZE = 4 JOB_LINKS_MEMORY_SIZE = 8 def reverse_mapping_list(d: dict) -> dict: r = {k: list() for k in set(d.values())} for k, v in d.items(): r[v].append(k) return r def reverse_mapping(d: dict) -> dict: return {v: k for k, v in d.items()} def frequency_dict(l: list) -> dict: f = {e: 0 for e in set(l)} for e in l: f[e] += 1 return f def print_terminal(workers_state_path: Path, keep_to_next_cycle=None): jobs = list(map(lambda a: a.name, filter( lambda a: '=' not in a.name, workers_state_path.iterdir()))) if len(jobs) == 0: print(ESC+'[2J', end='', flush=True) print(ESC+'[0;0H', end='', flush=True) print( f'Waiting for jobs @ {datetime.datetime.now()}', end='', flush=True) print(ESC+'[K', flush=True) return jobs_sequenced_by_guess = sorted( jobs, key=lambda a: ( 0 if ':' not in a else int(a.split(':', 1)[1]), a, ) ) jobs_state_description_content = dict() jobs_lines = dict() jobs_queues = dict() jobs_enqueueds = dict() jobs_states = dict() jobs_dates = dict() jobs_latest_link = dict() start_date = datetime.datetime.now() try: start_date = datetime.datetime.fromtimestamp( workers_state_path.stat().st_mtime) jobs_lines = { job: int( workers_state_path.joinpath(job+'=line') .read_text() ) for job in jobs if workers_state_path.joinpath(job+'=line').exists() } while len(jobs_sequenced_by_guess) > 0: job, *jobs_sequenced_by_guess = jobs_sequenced_by_guess if job not in jobs_lines: jobs_lines[job] = max([-1] + list(jobs_lines.values())) + 1 del job del jobs_sequenced_by_guess jobs_state_description_content = { job: workers_state_path.joinpath(job).read_text() for job in jobs } jobs_queues = { job: int( jobs_state_description_content[job] .split(':')[1] ) for job in jobs } jobs_enqueueds = { job: int( jobs_state_description_content[job] .split(':')[2] ) for job in jobs } jobs_states = { job: ( jobs_state_description_content[job] .split(':')[0] ) for job in jobs } jobs_dates = { job: ( workers_state_path.joinpath(job) .stat().st_mtime_ns ) for job in jobs } jobs_files = { job: int( jobs_state_description_content[job] .split(':')[4] ) for job in jobs } jobs_bytes = { job: int( jobs_state_description_content[job] .split(':')[3] ) for job in jobs } jobs_latest_link = { job: (None if len(parts := jobs_state_description_content[job].split(':', 6)) < 6 else parts[5]) for job in jobs } except KeyboardInterrupt: raise except: # print(ESC+'[2J', end='', flush=True) # print(ESC+'[0;0H', end='', flush=True) # traceback.print_exc() return keep_to_next_cycle # print(ESC+'[2J', end='', flush=True) print(ESC+'[0;0H', end='', flush=True) if keep_to_next_cycle is None: keep_to_next_cycle = dict() displayatbottom = '' jobs_sizes = {job: len( job + '@' + (str(jobs_enqueueds[job])+'/' if jobs_states[job] in ('running', 'scrubbing') else '') + str(jobs_queues[job]) ) for job in jobs_lines.keys()} state_sequence = [ ('finished',), ('running', 'scrubbing'), ('enqueued',), ('waiting',), ('failed',), ] block_sequence = ' \u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588' state_stats = frequency_dict(list(jobs_states.values())) links_stats = dict(waiting=0, enqueued=0, scrubbing=0, running1=0, running2=0, finished=0, failed=0) for job in jobs: state = jobs_states[job] jq1 = jobs_queues.get(job, 0) jq2 = jobs_enqueueds.get(job, 0) if state not in ('running', 'scrubbing'): if state not in links_stats: links_stats[state] = 0 links_stats[state] += jq1 elif state == 'scrubbing': links_stats['running1'] += jq2 links_stats['running2'] += jq1-jq2 else: links_stats['running1'] += jq2 links_stats['running2'] += jq1-jq2 term_sz = shutil.get_terminal_size() jobs_sorted = list(map(lambda a: a[1], sorted( reverse_mapping(jobs_lines).items()))) jobsdates_list = list(reversed(sorted(jobs_dates.values()))) jobs_daterank = { job: jobsdates_list.index(date) for job, date in jobs_dates.items()} bg_rank_color_names = [ # "grey_93", # 24 # "grey_89", # 23 # "grey_85", # 22 # "grey_82", # 21 # "grey_78", # 20 # "grey_74", # 19 # "grey_70", # 18 # "grey_66", # 17 "grey_62", # 16 "grey_58", # 15 "grey_54", # 14 "grey_50", # 13 "grey_46", # 12 "grey_42", # 11 "grey_39", # 10 "grey_35", # 9 "grey_30", # 8 "grey_27", # 7 "grey_23", # 6 "grey_19", # 5 "grey_15", # 4 "grey_11", # 3 "grey_7", # 2 "grey_3", # 1 ] bg_rank = [ colored.bg(clr) for clr in bg_rank_color_names ] bg_rank_size = max( 1, state_stats.get('running', 0) + state_stats.get('scrubbing', 0) ) bg_rank = bg_rank[-bg_rank_size:] bg_rang_programmed_len = len(bg_rank) bg_rang_programmed_len = bg_rank_size bg_rank += [colored.bg('black')] * (len(jobs_dates) - len(bg_rank)) # jobs_timestamps = keep_to_next_cycle.get( # 'jobs_timestamps', dict()) # keep_to_next_cycle['jobs_timestamps'] = jobs_timestamps # for job, state in jobs_states.items(): # if state in ('running', 'scrubbing'): # if (db := (jobs_dates[job], jobs_enqueueds[job])) not in (jqh := jobs_timestamps.get(job, list())): # jqh.append(db) # jobs_timestamps[job] = jqh[-JOB_LINKS_MEMORY_SIZE:] # else: # if job in jobs_timestamps: # del jobs_timestamps[job] # jobs_timestamps_transitions = dict() # jobs_links_avgs = list() # jobs_links_vars = list() # for job, timestamps_history in jobs_timestamps.items(): # job_timestamp_transitions = list(map( # lambda b: tuple(map( # lambda a: a[1] - a[0], # zip(*tuple(map( # lambda a: (a[0]/10**9, -a[1]), # b # ))))), # zip( # [timestamps_history[0]] + timestamps_history, # timestamps_history + [timestamps_history[-1]], # )))[1:-1] # job_timestamp = list(map( # lambda a: a[1]/a[0], # filter(lambda a: a[0]!=0 and a[1]!=0, job_timestamp_transitions) # )) # if len(job_timestamp) > 0: # job_timestamp_avg = sum(job_timestamp)/len(job_timestamp) # jobs_links_avgs.append(job_timestamp_avg) # diff = list(map( # lambda lpd: (lpd - job_timestamp_avg), # job_timestamp # )) # diffsqd = list(map( # lambda d: d**2, # diff # )) # job_timestamp_var = sum(diffsqd)/len(diffsqd) # jobs_links_vars.append(job_timestamp_var) # job_links_avg = sum(jobs_links_avgs) # job_links_var = .0 # if len(jobs_links_vars) > 0: # jobs_links_var = sum(jobs_links_vars) # links_per_sec = sum(map(lambda a: 1/max(2**-30, a), jobs_links_avgs)) # print(jobs_links_avgs, ESC+'[K') # print(jobs_links_vars, ESC+'[K') # print(jobs_timestamps_transitions) # return keep_to_next_cycle link_processing_timestamps = keep_to_next_cycle.get( 'link_processing_timestamps', list()) for link_processing_timestamp in jobs_dates.values(): if link_processing_timestamp not in link_processing_timestamps: link_processing_timestamps.append(link_processing_timestamp) link_processing_timestamps = list(reversed(sorted(link_processing_timestamps)))[ :max(state_stats.get("running", 0), LINK_MEMORY_SIZE)] keep_to_next_cycle['link_processing_timestamps'] = link_processing_timestamps link_processing_deltas = list(map( lambda t: (t[0]-t[1])/10**9, zip( [time.time()*10**9]+link_processing_timestamps, link_processing_timestamps+[link_processing_timestamps[-1]] )))[0:-1] link_processing_deltas_avg = sum( link_processing_deltas+[0])/max(1, len(link_processing_deltas)) link_processing_deltas_var = 0 if (l := len(link_processing_deltas)) > 0: diff = list(map( lambda lpd: (lpd - link_processing_deltas_avg), link_processing_deltas )) diffsqd = list(map( lambda d: d**2, diff )) link_processing_deltas_var = sum(diffsqd)/l job_links_avg = link_processing_deltas_avg job_links_var = link_processing_deltas_var download_pending_count = ( links_stats.get("waiting", 0) + links_stats.get("enqueued", 0) + links_stats.get("running1", 0) ) links_per_sec = 1/max(2**-30, job_links_avg) seconds_to_finish = download_pending_count*job_links_avg datetime_when_finished = datetime.datetime.now( ) + datetime.timedelta(seconds=seconds_to_finish) time_to_finish = '%2dd %2dh %2dm %2ds' % ( seconds_to_finish//(3600*24), (seconds_to_finish % (3600*24))//3600, (seconds_to_finish % 3600)//60, seconds_to_finish % 60, ) displayatbottom += f'Speed: {"%6.3f" % links_per_sec} links/s | ' displayatbottom += f'Speed: ##BYTESPERSEC## | ' displayatbottom += f'ETA: {time_to_finish} | ' displayatbottom += f'ETL: {datetime_when_finished} | ' displayatbottom += f'Error: \u00b1{"%6.2f" % (100*(job_links_var**.5)/job_links_avg)}%' # displayatbottom += f'Error: \u00b1{"%6.2f" % (100*(job_links_var**.5)/job_links_avg)}%' # displayatbottom += str(link_processing_deltas) number1colors: Dict[str, List[str]] = dict( waiting=[ colored.fg('light_gray'), colored.attr('dim'), ], enqueued=[ colored.fg('light_red'), ], scrubbing=[ colored.fg('light_cyan') ], running=[ colored.fg('light_yellow') ], finished=[ colored.fg('light_green') ], ) number1colors['failed'] = number1colors['waiting'] number2colors: Dict[str, List[str]] = number1colors.copy() number2colors['running'] = [colored.fg('light_cyan')] number2colors['scrubbing'] = [colored.fg('light_magenta')] number2colors['running1'] = number1colors['running'] number2colors['running2'] = number2colors['running'] workers_text_stats = ( colored.stylize( f'{state_stats.get("waiting", 0)} waiting', number1colors['waiting'], ) + ' - ' + colored.stylize( f'{state_stats.get("enqueued", 0)} enqueued', number1colors['enqueued'], ) + ' - ' + colored.stylize( f'{state_stats.get("running", 0)} running', number1colors['running'], ) + ' \u00b7 ' + colored.stylize( f'{state_stats.get("scrubbing", 0)} scrubbing', number1colors['scrubbing'], ) + ' - ' + colored.stylize( f'{state_stats.get("finished", 0)} finished', number1colors['finished'], ) ) links_text_stats = ( colored.stylize( f'{links_stats.get("waiting", 0)} w.', number2colors['waiting'], ) + ' - ' + colored.stylize( f'{links_stats.get("enqueued", 0)} e.', number2colors['enqueued'], ) + ' - ' + colored.stylize( f'{links_stats.get("running1", 0)} staging', number2colors['running1'], ) + ' \u00b7 ' + colored.stylize( f'{links_stats.get("running2", 0)} downloaded', number2colors['running2'], ) + ' - ' + colored.stylize( f'{links_stats.get("finished", 0)} f.', number2colors['finished'], ) ) text_stats_size = max(map(len, [workers_text_stats, links_text_stats])) text_stats_fmt = '{0:^%d}' % text_stats_size workers_text_stats = text_stats_fmt.format(workers_text_stats) links_text_stats = text_stats_fmt.format(links_text_stats) dwldcount = links_stats.get("running2", 0) + links_stats.get("finished", 0) filecount = sum(jobs_files.values()) bytecount = sum(jobs_bytes.values()) worker_ratio = state_stats.get( "finished", 0)/(max(1, sum(state_stats.values()))) links_ratio = (links_stats.get("running2", 0) + links_stats.get("finished", 0))/max(1, sum(links_stats.values())) jobs_bytes_history = keep_to_next_cycle.get( 'jobs_bytes_history', dict()) keep_to_next_cycle['jobs_bytes_history'] = jobs_bytes_history for job, state in jobs_states.items(): if state in ('running', 'scrubbing'): if (db := (jobs_dates[job], jobs_bytes[job])) not in (jbh := jobs_bytes_history.get(job, list())): jbh.append(db) jobs_bytes_history[job] = jbh[-JOB_LINKS_MEMORY_SIZE:] else: if job in jobs_bytes_history: del jobs_bytes_history[job] jobs_speeds_avg = list() for job, bytes_history in jobs_bytes_history.items(): job_byte_transitions = list(map( lambda b: tuple(map( lambda a: a[1] - a[0], zip(*tuple(map( lambda a: (a[0]/10**9, a[1]), b ))))), zip( [bytes_history[0]] + bytes_history, bytes_history + [bytes_history[-1]], )))[1:-1] job_speed = list(map( lambda a: a[1]/a[0], filter(lambda a: a[0] != 0 and a[1] != 0, job_byte_transitions) )) if len(job_speed) > 0: job_speed_avg = sum(job_speed)/len(job_speed) jobs_speeds_avg.append(job_speed_avg) jobs_speed_avg = sum(jobs_speeds_avg) # return keep_to_next_cycle displayatbottom = displayatbottom.replace( "##BYTESPERSEC##", "%10s/s \u00b7 %10sps" % ( str(format_power2(jobs_speed_avg)), str(format_power10(jobs_speed_avg*8, suffix='b')), ), ) print( f'# Monitoring {len(jobs)} jobs ' + f'@ {datetime.datetime.now()} ' + f'@ {datetime.datetime.now() - start_date}' + '', end='', flush=True) print(ESC+'[K', flush=True) # print(ESC+'[K', flush=True) print('Workers: ' + '%8.4f%% | ' % (100*worker_ratio) + workers_text_stats + ' | ' + f'Current: ' + f'{dwldcount} links, ' + f'{filecount} files, ' + f'{format_power10(bytecount)}' + '', end='') print(ESC+'[K', flush=True) print('Links: ' + '%8.4f%% | ' % (100*links_ratio) + links_text_stats + ' | ' + f'Expected: ' + '%.3f fl/lnk, ' % (filecount/max(1, dwldcount)) + f'{int(filecount/max(2**-30, links_ratio))} files, ' + f'{format_power10(int(bytecount/max(2**-30, links_ratio)))}' + '', end='') print(ESC+'[K', flush=True) print('Latest updates gradient: ', end='') bg_rang_programmed_len_digits = len('%d' % (bg_rang_programmed_len+1)) for i in range(bg_rang_programmed_len+1): if i == bg_rang_programmed_len: print(' ', end='') print('-'*bg_rang_programmed_len_digits, end='') else: print(bg_rank[i], end='') print(' ', end='') print(('%%0%dd' % bg_rang_programmed_len_digits) % (i+1), end='') print(' ', end='') print(colored.attr('reset'), end='') # print(ESC+'[K', flush=True) print(ESC+'[K', flush=True) print('Overall progress: ', end='') for current_job in jobs_sorted: current_state = jobs_states[current_job] number1color = number1colors.get(current_state, list()) number2color = number2colors.get(current_state, list()) current_block_no = ((len(block_sequence)-1)*( jobs_queues[current_job]-jobs_enqueueds[current_job]))//max(1, jobs_queues[current_job]) print( colored.stylize( block_sequence[current_block_no], [color.replace("38;5;", "48;5;") for color in number1color] + number2color ), end='' ) print(ESC+'[K', flush=True) max_job_size = max([*jobs_sizes.values(), 0]) per_column = term_sz.columns//(max_job_size+1) for stateelems in state_sequence: current_jobs = [job for job in jobs_sorted if jobs_states[job] in stateelems] if len(current_jobs) <= 0: continue print('» ', end='') print(' & '.join([ colored.stylize(stateelem, number1colors[stateelem]) for stateelem in stateelems ]), end='' ) print(' «', end='') print(ESC+'[K', flush=True) # print(stateelem) max_job_size = max([0]+[jobs_sizes[job] for job in current_jobs]) per_column = term_sz.columns//(max_job_size+1) current_jobs = sorted( current_jobs, key=lambda j: ( jobs_enqueueds[j]/max(0.00000001, jobs_queues[j]), jobs_lines[j],) ) # current_jobs = jobs_sorted.copy() while len(current_jobs) > 0: for _ in range(per_column): if len(current_jobs) > 0: current_job, *current_jobs = current_jobs current_state = jobs_states[current_job] number1color = number1colors.get(current_state, list()) number2color = number2colors.get(current_state, list()) print(''.join(number1color), end='') print(bg_rank[jobs_daterank[current_job]], end='') print(' '*(max_job_size-jobs_sizes[current_job]), end='') print(current_job, end='') print('@', end='') print(str(jobs_queues[current_job]), end='') if current_state in ('running', 'scrubbing'): print('/', end='') print(''.join(number2color), end='') print(str(jobs_enqueueds[current_job]), end='') print(colored.attr('reset'), end='') print(' ', end='') print(ESC+'[K', flush=False) print(displayatbottom, end=ESC+'[K', flush=True) print(ESC+'[0J', end='', flush=True) print(ESC+'[0;0H', end='', flush=True) return keep_to_next_cycle def do_cycle_sleep(): sleep(1/60) def main(): selfmodule_path = (Path(__file__) .absolute() .relative_to(Path('.').absolute() )) selfmodule_name = ( str(selfmodule_path.parent).replace('/', '.') + '.' + selfmodule_path.stem) selfmodule_name = ( selfmodule_name[1:] if selfmodule_name.startswith('.') else selfmodule_name) selfmodule = importlib.import_module(selfmodule_name) workers_state_path = Path('i_gdl_w') from_exc = False keep_to_next_cycle = None print(ESC+'[2J', end='', flush=True) print(ESC+'[0;0H', end='', flush=True) while workers_state_path.exists(): try: selfmodule = importlib.reload(selfmodule) if from_exc: from_exc = False print(ESC+'[2J', end='', flush=True) print(ESC+'[0;0H', end='', flush=True) keep_to_next_cycle = selfmodule.print_terminal( workers_state_path, keep_to_next_cycle) except KeyboardInterrupt: print(ESC+'[2J', end='', flush=True) print(ESC+'[0;0H', end='', flush=True) raise except BaseException: print(ESC+'[2J', end='', flush=True) print(ESC+'[0;0H', end='', flush=True) traceback.print_exc() sio = StringIO() traceback.print_exc(file=sio) Path('display_fetch_futures.trace').write_text(sio.getvalue()) from_exc = True sleep(1) selfmodule.do_cycle_sleep() print(ESC+'[0;0H', end='', flush=True) print(ESC+'[K', end='', flush=True) print('Queue is empty') if __name__ == "__main__": main()