From b2f2dfe2a4989da5c62c2895eae73e9000760901 Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Sat, 12 Jun 2021 23:57:16 +0000 Subject: [PATCH] finished refactor --- src/cli/dataset.py | 74 +++++++++++++++ src/cli/interface.py | 4 +- src/cli/processing.py | 191 ++++++++++++++++++++++++++++++++++++++ src/cli/superscript.py | 204 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 455 insertions(+), 18 deletions(-) create mode 100644 src/cli/dataset.py create mode 100644 src/cli/processing.py diff --git a/src/cli/dataset.py b/src/cli/dataset.py new file mode 100644 index 0000000..5a69cc9 --- /dev/null +++ b/src/cli/dataset.py @@ -0,0 +1,74 @@ +import data as d + +def get_previous_time(apikey): + + previous_time = d.get_analysis_flags(apikey, "latest_update") + + if previous_time == None: + + d.set_analysis_flags(apikey, "latest_update", 0) + previous_time = 0 + + else: + + previous_time = previous_time["latest_update"] + + return previous_time + +def set_current_time(apikey, current_time): + + d.set_analysis_flags(apikey, "latest_update", {"latest_update":current_time}) + +def load_match(apikey, competition): + + return d.get_match_data_formatted(apikey, competition) + +def load_metric(apikey, competition, match, group_name, metrics): + + group = {} + + for team in match[group_name]: + + db_data = d.get_team_metrics_data(apikey, competition, team) + + if d.get_team_metrics_data(apikey, competition, team) == None: + + elo = {"score": metrics["elo"]["score"]} + gl2 = {"score": metrics["gl2"]["score"], "rd": metrics["gl2"]["rd"], "vol": metrics["gl2"]["vol"]} + ts = {"mu": metrics["ts"]["mu"], "sigm+a": metrics["ts"]["sigma"]} + + group[team] = {"elo": elo, "gl2": gl2, "ts": ts} + + else: + + metrics = db_data["metrics"] + + elo = metrics["elo"] + gl2 = metrics["gl2"] + ts = metrics["ts"] + + group[team] = {"elo": elo, "gl2": gl2, "ts": ts} + + return group + +def load_pit(apikey, competition): + + return d.get_pit_data_formatted(apikey, competition) + +def push_match(apikey, competition, results): + + for team in results: + + d.push_team_tests_data(apikey, competition, team, results[team]) + +def push_metric(apikey, competition, metric): + + for team in metric: + + d.push_team_metrics_data(apikey, competition, team, metric[team]) + +def push_pit(apikey, competition, pit): + + for variable in pit: + + d.push_team_pit_data(apikey, competition, variable, pit[variable]) \ No newline at end of file diff --git a/src/cli/interface.py b/src/cli/interface.py index 9fd5765..9ee821a 100644 --- a/src/cli/interface.py +++ b/src/cli/interface.py @@ -5,7 +5,7 @@ import platform empty_delim = " " hard_divided_delim = "|" -soft_divided_delim = ":" +soft_divided_delim = "|" l_brack = "[" r_brack = "]" @@ -17,7 +17,7 @@ stderr = sys.stderr def log(target, level, message, code = 0): - message = time.ctime() + empty_delim + str(level) + l_brack + str(code) + r_brack + empty_delim + soft_divided_delim + empty_delim + message + message = time.ctime() + empty_delim + str(level) + l_brack + f"{code:04}" + r_brack + empty_delim + soft_divided_delim + empty_delim + message print(message, file = target) def clear(): diff --git a/src/cli/processing.py b/src/cli/processing.py new file mode 100644 index 0000000..1f7e322 --- /dev/null +++ b/src/cli/processing.py @@ -0,0 +1,191 @@ +import numpy as np + +from tra_analysis import Analysis as an +from dataset import push_metric +from data import pull_new_tba_matches + +def simplestats(data_test): + + data = np.array(data_test[0]) + data = data[np.isfinite(data)] + ranges = list(range(len(data))) + + test = data_test[1] + + if test == "basic_stats": + return an.basic_stats(data) + + if test == "historical_analysis": + return an.histo_analysis([ranges, data]) + + if test == "regression_linear": + return an.regression(ranges, data, ['lin']) + + if test == "regression_logarithmic": + return an.regression(ranges, data, ['log']) + + if test == "regression_exponential": + return an.regression(ranges, data, ['exp']) + + if test == "regression_polynomial": + return an.regression(ranges, data, ['ply']) + + if test == "regression_sigmoidal": + return an.regression(ranges, data, ['sig']) + +def matchloop(apikey, competition, data, tests, exec_threads): + + short_mapping = {"regression_linear": "lin", "regression_logarithmic": "log", "regression_exponential": "exp", "regression_polynomial": "ply", "regression_sigmoidal": "sig"} + + class AutoVivification(dict): + def __getitem__(self, item): + try: + return dict.__getitem__(self, item) + except KeyError: + value = self[item] = type(self)() + return value + + return_vector = {} + + team_filtered = [] + variable_filtered = [] + variable_data = [] + test_filtered = [] + result_filtered = [] + return_vector = AutoVivification() + + for team in data: + + for variable in data[team]: + + if variable in tests: + + for test in tests[variable]: + + team_filtered.append(team) + variable_filtered.append(variable) + variable_data.append((data[team][variable], test)) + test_filtered.append(test) + + result_filtered = exec_threads.map(simplestats, variable_data) + i = 0 + + result_filtered = list(result_filtered) + + for result in result_filtered: + + filtered = test_filtered[i] + + try: + short = short_mapping[filtered] + return_vector[team_filtered[i]][variable_filtered[i]][test_filtered[i]] = result[short] + except KeyError: # not in mapping + return_vector[team_filtered[i]][variable_filtered[i]][test_filtered[i]] = result + i += 1 + + return return_vector + +def metricloop(tbakey, apikey, competition, timestamp, metrics): # listener based metrics update + + elo_N = metrics["elo"]["N"] + elo_K = metrics["elo"]["K"] + + matches = pull_new_tba_matches(tbakey, competition, timestamp) + + red = {} + blu = {} + + for match in matches: + + red = load_metric(apikey, competition, match, "red", metrics) + blu = load_metric(apikey, competition, match, "blue", metrics) + + elo_red_total = 0 + elo_blu_total = 0 + + gl2_red_score_total = 0 + gl2_blu_score_total = 0 + + gl2_red_rd_total = 0 + gl2_blu_rd_total = 0 + + gl2_red_vol_total = 0 + gl2_blu_vol_total = 0 + + for team in red: + + elo_red_total += red[team]["elo"]["score"] + + gl2_red_score_total += red[team]["gl2"]["score"] + gl2_red_rd_total += red[team]["gl2"]["rd"] + gl2_red_vol_total += red[team]["gl2"]["vol"] + + for team in blu: + + elo_blu_total += blu[team]["elo"]["score"] + + gl2_blu_score_total += blu[team]["gl2"]["score"] + gl2_blu_rd_total += blu[team]["gl2"]["rd"] + gl2_blu_vol_total += blu[team]["gl2"]["vol"] + + red_elo = {"score": elo_red_total / len(red)} + blu_elo = {"score": elo_blu_total / len(blu)} + + red_gl2 = {"score": gl2_red_score_total / len(red), "rd": gl2_red_rd_total / len(red), "vol": gl2_red_vol_total / len(red)} + blu_gl2 = {"score": gl2_blu_score_total / len(blu), "rd": gl2_blu_rd_total / len(blu), "vol": gl2_blu_vol_total / len(blu)} + + + if match["winner"] == "red": + + observations = {"red": 1, "blu": 0} + + elif match["winner"] == "blue": + + observations = {"red": 0, "blu": 1} + + else: + + observations = {"red": 0.5, "blu": 0.5} + + red_elo_delta = an.Metric().elo(red_elo["score"], blu_elo["score"], observations["red"], elo_N, elo_K) - red_elo["score"] + blu_elo_delta = an.Metric().elo(blu_elo["score"], red_elo["score"], observations["blu"], elo_N, elo_K) - blu_elo["score"] + + new_red_gl2_score, new_red_gl2_rd, new_red_gl2_vol = an.Metric().glicko2(red_gl2["score"], red_gl2["rd"], red_gl2["vol"], [blu_gl2["score"]], [blu_gl2["rd"]], [observations["red"], observations["blu"]]) + new_blu_gl2_score, new_blu_gl2_rd, new_blu_gl2_vol = an.Metric().glicko2(blu_gl2["score"], blu_gl2["rd"], blu_gl2["vol"], [red_gl2["score"]], [red_gl2["rd"]], [observations["blu"], observations["red"]]) + + red_gl2_delta = {"score": new_red_gl2_score - red_gl2["score"], "rd": new_red_gl2_rd - red_gl2["rd"], "vol": new_red_gl2_vol - red_gl2["vol"]} + blu_gl2_delta = {"score": new_blu_gl2_score - blu_gl2["score"], "rd": new_blu_gl2_rd - blu_gl2["rd"], "vol": new_blu_gl2_vol - blu_gl2["vol"]} + + for team in red: + + red[team]["elo"]["score"] = red[team]["elo"]["score"] + red_elo_delta + + red[team]["gl2"]["score"] = red[team]["gl2"]["score"] + red_gl2_delta["score"] + red[team]["gl2"]["rd"] = red[team]["gl2"]["rd"] + red_gl2_delta["rd"] + red[team]["gl2"]["vol"] = red[team]["gl2"]["vol"] + red_gl2_delta["vol"] + + for team in blu: + + blu[team]["elo"]["score"] = blu[team]["elo"]["score"] + blu_elo_delta + + blu[team]["gl2"]["score"] = blu[team]["gl2"]["score"] + blu_gl2_delta["score"] + blu[team]["gl2"]["rd"] = blu[team]["gl2"]["rd"] + blu_gl2_delta["rd"] + blu[team]["gl2"]["vol"] = blu[team]["gl2"]["vol"] + blu_gl2_delta["vol"] + + temp_vector = {} + temp_vector.update(red) + temp_vector.update(blu) + + push_metric(apikey, competition, temp_vector) + +def pitloop(apikey, competition, pit, tests): + + return_vector = {} + for team in pit: + for variable in pit[team]: + if variable in tests: + if not variable in return_vector: + return_vector[variable] = [] + return_vector[variable].append(pit[team][variable]) + + return return_vector \ No newline at end of file diff --git a/src/cli/superscript.py b/src/cli/superscript.py index 09ef513..0cbe4a0 100644 --- a/src/cli/superscript.py +++ b/src/cli/superscript.py @@ -11,6 +11,8 @@ __changelog__ = """changelog: - moved printing and logging related functions to interface.py (changelog will stay in this file) - changed function return files for load_config and save_config to standard C values (0 for success, 1 for error) - added local variables for config location + - moved dataset getting and setting functions to dataset.py (changelog will stay in this file) + - moved matchloop, metricloop, pitloop and helper functions (simplestats) to processing.py 0.8.6: - added proper main function 0.8.5: @@ -121,23 +123,22 @@ __author__ = ( __all__ = [ "load_config", "save_config", - "get_previous_time", - "load_match", - "matchloop", - "load_metric", - "metricloop", - "load_pit", - "pitloop", - "push_match", - "push_metric", - "push_pit", ] # imports: import json +import multiprocessing +import os +import math +from multiprocessing import Pool +import time +import warnings +import sys -from cli_interface import splash, log, ERR, INF, stdout, stderr +from interface import splash, log, ERR, INF, stdout, stderr +from dataset import get_previous_time, set_current_time, load_match, push_match, load_metric, push_metric, load_pit, push_pit +from processing import matchloop, metricloop, pitloop config_path = "config.json" sample_json = """{ @@ -189,21 +190,190 @@ sample_json = """{ def main(): + warnings.filterwarnings("ignore") + sys.stderr = open("errorlog.txt", "w") + splash(__version__) + loop_exit_code = 0 + loop_stored_exception = None + + while True: + + try: + + loop_start = time.time() + + current_time = time.time() + log(stdout, INF, "current time: " + str(current_time)) + + config = {} + if load_config(config_path, config) == 1: + exit(1) + + error_flag = False + + try: + competition = config["competition"] + except: + log(stderr, ERR, "could not find competition field in config", code = 101) + error_flag = True + try: + match_tests = config["statistics"]["match"] + except: + log(stderr, ERR, "could not find match_tests field in config", code = 102) + error_flag = True + try: + metrics_tests = config["statistics"]["metric"] + except: + log(stderr, ERR, "could not find metrics_tests field in config", code = 103) + error_flag = True + try: + pit_tests = config["statistics"]["pit"] + except: + log(stderr, ERR, "could not find pit_tests field in config", code = 104) + error_flag = True + + if error_flag: + exit(1) + error_flag = False + + if competition == None or competition == "": + log(stderr, ERR, "competition field in config must not be empty", code = 105) + error_flag = True + if match_tests == None: + log(stderr, ERR, "match_tests field in config must not be empty", code = 106) + error_flag = True + if metrics_tests == None: + log(stderr, ERR, "metrics_tests field in config must not be empty", code = 107) + error_flag = True + if pit_tests == None: + log(stderr, ERR, "pit_tests field in config must not be empty", code = 108) + error_flag = True + + if error_flag: + exit(1) + + log(stdout, INF, "found and loaded competition, match_tests, metrics_tests, pit_tests from config") + + sys_max_threads = os.cpu_count() + try: + cfg_max_threads = config["max-threads"] + except: + log(stderr, ERR, "max-threads field in config must not be empty, refer to documentation for configuration options", code = 109) + exit(1) + + if cfg_max_threads > -sys_max_threads and cfg_max_threads < 0 : + alloc_processes = sys_max_threads + cfg_max_threads + elif cfg_max_threads > 0 and cfg_max_threads < 1: + alloc_processes = math.floor(cfg_max_threads * sys_max_threads) + elif cfg_max_threads > 1 and cfg_max_threads <= sys_max_threads: + alloc_processes = cfg_max_threads + elif cfg_max_threads == 0: + alloc_processes = sys_max_threads + else: + log(stderr, ERR, "max-threads must be between -" + str(sys_max_threads) + " and " + str(sys_max_threads) + ", but got " + cfg_max_threads, code = 110) + exit(1) + + log(stdout, INF, "found and loaded max-threads from config") + log(stdout, INF, "attempting to start " + str(alloc_processes) + " threads") + try: + exec_threads = Pool(processes = alloc_processes) + except Exception as e: + log(stderr, ERR, "unable to start threads", code = 200) + log(stderr, INF, e) + exit(1) + log(stdout, INF, "successfully initialized " + str(alloc_processes) + " threads") + + exit_flag = False + + try: + apikey = config["key"]["database"] + except: + log(stderr, ERR, "database key field in config must not be empty, please populate the database key", code = 111) + exit_flag = True + try: + tbakey = config["key"]["tba"] + except: + log(stderr, ERR, "tba key field in config must not be empty, please populate the tba key", code = 112) + exit_flag = True + + if exit_flag: + exit(1) + + log(stdout, INF, "found and loaded database and tba keys") + + previous_time = get_previous_time(apikey) + log(stdout, INF, "analysis backtimed to: " + str(previous_time)) + + start = time.time() + log(stdout, INF, "loading match data") + match_data = load_match(apikey, competition) + log(stdout, INF, "finished loading match data in " + str(time.time() - start) + " seconds") + + start = time.time() + log(stdout, INF, "performing analysis on match data") + results = matchloop(apikey, competition, match_data, match_tests, exec_threads) + log(stdout, INF, "finished match analysis in " + str(time.time() - start) + " seconds") + + start = time.time() + log(stdout, INF, "uploading match results to database") + push_match(apikey, competition, results) + log(stdout, INF, "finished uploading match results in " + str(time.time() - start) + " seconds") + + start = time.time() + log(stdout, INF, "performing analysis on team metrics") + results = metricloop(tbakey, apikey, competition, current_time, metrics_tests) + log(stdout, INF, "finished metric analysis and pushed to database in " + str(time.time() - start) + " seconds") + + start = time.time() + log(stdout, INF, "loading pit data") + pit_data = load_pit(apikey, competition) + log(stdout, INF, "finished loading pit data in " + str(time.time() - start) + " seconds") + + start = time.time() + log(stdout, INF, "performing analysis on pit data") + results = pitloop(apikey, competition, pit_data, pit_tests) + log(stdout, INF, "finished pit analysis in " + str(time.time() - start) + " seconds") + + start = time.time() + log(stdout, INF, "uploading pit results to database") + push_pit(apikey, competition, results) + log(stdout, INF, "finished uploading pit results in " + str(time.time() - start) + " seconds") + + set_current_time(apikey, current_time) + log(stdout, INF, "finished all tests in " + str(time.time() - loop_start) + " seconds, looping") + + except KeyboardInterrupt: + log(stdout, INF, "detected KeyboardInterrupt, killing threads") + if "exec_threads" in locals(): + exec_threads.terminate() + exec_threads.close() + log(stdout, INF, "terminated threads, exiting") + loop_stored_exception = sys.exc_info() + loop_exit_code = 0 + break + except Exception as e: + log(stderr, ERR, "encountered an exception while running") + print(e, file = stderr) + loop_exit_code = 1 + break + + sys.exit(loop_exit_code) + def load_config(path, config_vector): try: f = open(path, "r") + config_vector.update(json.load(f)) + f.close() + log(stdout, INF, "found and opened config at <" + path + ">") + return 0 except: - log(stderr, ERR, "could not find config at <" + path + ">, generating blank config and exiting") + log(stderr, ERR, "could not find config at <" + path + ">, generating blank config and exiting", code = 100) f = open(path, "w") f.write(sample_json) f.close() return 1 - - config_vector = json.load(f) - f.close() - return 0 def save_config(path, config_vector): try: @@ -215,4 +385,6 @@ def save_config(path, config_vector): return 1 if __name__ == "__main__": + if sys.platform.startswith("win"): + multiprocessing.freeze_support() main() \ No newline at end of file