From ec9bac7830989430fb86b60c8c3bab9cd654307a Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Mon, 21 Sep 2020 05:59:15 +0000 Subject: [PATCH 1/5] superscript.py v 0.8.0 Signed-off-by: Arthur Lu --- data-analysis/superscript.py | 139 +++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 46 deletions(-) diff --git a/data-analysis/superscript.py b/data-analysis/superscript.py index fdd4e5a5..cb91adbe 100644 --- a/data-analysis/superscript.py +++ b/data-analysis/superscript.py @@ -3,10 +3,13 @@ # Notes: # setup: -__version__ = "0.7.0" +__version__ = "0.8.0" # changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: + 0.8.0: + - added multithreading to matchloop + - tweaked user log 0.7.0: - finished implementing main function 0.6.2: @@ -114,62 +117,72 @@ __all__ = [ from tra_analysis import analysis as an import data as d +from collections import defaultdict import json import numpy as np from os import system, name from pathlib import Path import matplotlib.pyplot as plt +from concurrent.futures import ThreadPoolExecutor import time import warnings +global exec_threads + def main(): + global exec_threads + warnings.filterwarnings("ignore") - while (True): + # while (True): - current_time = time.time() - print("[OK] time: " + str(current_time)) + current_time = time.time() + print("[OK] time: " + str(current_time)) - config = load_config("config.json") - competition = config["competition"] - match_tests = config["statistics"]["match"] - pit_tests = config["statistics"]["pit"] - metrics_tests = config["statistics"]["metric"] - print("[OK] configs loaded") + config = load_config("config.json") + competition = config["competition"] + match_tests = config["statistics"]["match"] + pit_tests = config["statistics"]["pit"] + metrics_tests = config["statistics"]["metric"] + print("[OK] configs loaded") - apikey = config["key"]["database"] - tbakey = config["key"]["tba"] - print("[OK] loaded keys") + print("[OK] starting threads") + exec_threads = ThreadPoolExecutor(max_workers = config["max-threads"]) + print("[OK] threads started") - previous_time = get_previous_time(apikey) - print("[OK] analysis backtimed to: " + str(previous_time)) + apikey = config["key"]["database"] + tbakey = config["key"]["tba"] + print("[OK] loaded keys") - print("[OK] loading data") - start = time.time() - match_data = load_match(apikey, competition) - pit_data = load_pit(apikey, competition) - print("[OK] loaded data in " + str(time.time() - start) + " seconds") + previous_time = get_previous_time(apikey) + print("[OK] analysis backtimed to: " + str(previous_time)) - print("[OK] running tests") - start = time.time() - matchloop(apikey, competition, match_data, match_tests) - print("[OK] finished tests in " + str(time.time() - start) + " seconds") + print("[OK] loading data") + start = time.time() + match_data = load_match(apikey, competition) + pit_data = load_pit(apikey, competition) + print("[OK] loaded data in " + str(time.time() - start) + " seconds") - print("[OK] running metrics") - start = time.time() - metricloop(tbakey, apikey, competition, previous_time, metrics_tests) - print("[OK] finished metrics in " + str(time.time() - start) + " seconds") + print("[OK] running match stats") + start = time.time() + matchloop(apikey, competition, match_data, match_tests) + print("[OK] finished match stats in " + str(time.time() - start) + " seconds") - print("[OK] running pit analysis") - start = time.time() - pitloop(apikey, competition, pit_data, pit_tests) - print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds") - - set_current_time(apikey, current_time) - print("[OK] finished all tests, looping") + print("[OK] running team metrics") + start = time.time() + metricloop(tbakey, apikey, competition, previous_time, metrics_tests) + print("[OK] finished team metrics in " + str(time.time() - start) + " seconds") - clear() + print("[OK] running pit analysis") + start = time.time() + pitloop(apikey, competition, pit_data, pit_tests) + print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds") + + set_current_time(apikey, current_time) + print("[OK] finished all tests, looping") + + #clear() def clear(): @@ -219,12 +232,18 @@ def load_match(apikey, competition): def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match] - def simplestats(data, test): + start = time.time() - data = np.array(data) + global exec_threads + + def simplestats(data_test): + + data = np.array(data_test[0]) data = data[np.isfinite(data)] ranges = list(range(len(data))) + test = data_test[1] + if test == "basic_stats": return an.basic_stats(data) @@ -246,19 +265,47 @@ def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][ if test == "regression_sigmoidal": return an.regression(ranges, data, ['sig']) + class AutoVivification(dict): + def __getitem__(self, item): + try: + return dict.__getitem__(self, item) + except KeyError: + value = self[item] = type(self)() + return value + return_vector = {} + + team_filtered = [] + variable_filtered = [] + variable_data = [] + test_filtered = [] + result_filtered = [] + return_vector = AutoVivification() + for team in data: - variable_vector = {} + for variable in data[team]: - test_vector = {} - variable_data = data[team][variable] + if variable in tests: + for test in tests[variable]: - test_vector[test] = simplestats(variable_data, test) - else: - pass - variable_vector[variable] = test_vector - return_vector[team] = variable_vector + + team_filtered.append(team) + variable_filtered.append(variable) + variable_data.append((data[team][variable], test)) + test_filtered.append(test) + + result_filtered = exec_threads.map(simplestats, variable_data) + i = 0 + + result_filtered = list(result_filtered) + + for result in result_filtered: + + return_vector[team_filtered[i]][variable_filtered[i]][test_filtered[i]] = result + i += 1 + + print("metrics finished in " + str(time.time() - start)) push_match(apikey, competition, return_vector) From adbc749c472c779b6e37c568e1d28eb79c059c76 Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Mon, 21 Sep 2020 07:21:59 +0000 Subject: [PATCH 2/5] added max-threads key in config Signed-off-by: Arthur Lu --- data-analysis/config.json | 1 + data-analysis/superscript.py | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/data-analysis/config.json b/data-analysis/config.json index eca2d09a..a1715a95 100644 --- a/data-analysis/config.json +++ b/data-analysis/config.json @@ -1,4 +1,5 @@ { + "max-threads": 8, "team": "", "competition": "2020ilch", "key":{ diff --git a/data-analysis/superscript.py b/data-analysis/superscript.py index cb91adbe..ab7eec74 100644 --- a/data-analysis/superscript.py +++ b/data-analysis/superscript.py @@ -232,8 +232,6 @@ def load_match(apikey, competition): def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match] - start = time.time() - global exec_threads def simplestats(data_test): @@ -305,8 +303,6 @@ def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][ return_vector[team_filtered[i]][variable_filtered[i]][test_filtered[i]] = result i += 1 - print("metrics finished in " + str(time.time() - start)) - push_match(apikey, competition, return_vector) def load_metric(apikey, competition, match, group_name, metrics): From 2804d03593c3240343603790aa847a30d218f66c Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Mon, 21 Sep 2020 07:38:18 +0000 Subject: [PATCH 3/5] superscript.py v 0.8.1 Signed-off-by: Arthur Lu --- data-analysis/config.json | 2 +- data-analysis/superscript.py | 65 +++++++++++++++++++----------------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/data-analysis/config.json b/data-analysis/config.json index a1715a95..c146d833 100644 --- a/data-analysis/config.json +++ b/data-analysis/config.json @@ -1,5 +1,5 @@ { - "max-threads": 8, + "max-threads": 1, "team": "", "competition": "2020ilch", "key":{ diff --git a/data-analysis/superscript.py b/data-analysis/superscript.py index ab7eec74..a73b035d 100644 --- a/data-analysis/superscript.py +++ b/data-analysis/superscript.py @@ -3,10 +3,12 @@ # Notes: # setup: -__version__ = "0.8.0" +__version__ = "0.8.1" # changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: + 0.8.1: + - optimized matchloop further by bypassing GIL 0.8.0: - added multithreading to matchloop - tweaked user log @@ -122,6 +124,7 @@ import json import numpy as np from os import system, name from pathlib import Path +from multiprocessing import Pool import matplotlib.pyplot as plt from concurrent.futures import ThreadPoolExecutor import time @@ -148,7 +151,7 @@ def main(): print("[OK] configs loaded") print("[OK] starting threads") - exec_threads = ThreadPoolExecutor(max_workers = config["max-threads"]) + exec_threads = Pool(processes = config["max-threads"]) print("[OK] threads started") apikey = config["key"]["database"] @@ -230,39 +233,39 @@ def load_match(apikey, competition): return d.get_match_data_formatted(apikey, competition) +def simplestats(data_test): + + data = np.array(data_test[0]) + data = data[np.isfinite(data)] + ranges = list(range(len(data))) + + test = data_test[1] + + if test == "basic_stats": + return an.basic_stats(data) + + if test == "historical_analysis": + return an.histo_analysis([ranges, data]) + + if test == "regression_linear": + return an.regression(ranges, data, ['lin']) + + if test == "regression_logarithmic": + return an.regression(ranges, data, ['log']) + + if test == "regression_exponential": + return an.regression(ranges, data, ['exp']) + + if test == "regression_polynomial": + return an.regression(ranges, data, ['ply']) + + if test == "regression_sigmoidal": + return an.regression(ranges, data, ['sig']) + def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match] global exec_threads - def simplestats(data_test): - - data = np.array(data_test[0]) - data = data[np.isfinite(data)] - ranges = list(range(len(data))) - - test = data_test[1] - - if test == "basic_stats": - return an.basic_stats(data) - - if test == "historical_analysis": - return an.histo_analysis([ranges, data]) - - if test == "regression_linear": - return an.regression(ranges, data, ['lin']) - - if test == "regression_logarithmic": - return an.regression(ranges, data, ['log']) - - if test == "regression_exponential": - return an.regression(ranges, data, ['exp']) - - if test == "regression_polynomial": - return an.regression(ranges, data, ['ply']) - - if test == "regression_sigmoidal": - return an.regression(ranges, data, ['sig']) - class AutoVivification(dict): def __getitem__(self, item): try: From e3241fa34d6c95036ee42d8c84235cf7aea984fd Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Sat, 26 Sep 2020 20:57:39 +0000 Subject: [PATCH 4/5] superscript.py v 0.8.2 Signed-off-by: Arthur Lu --- data-analysis/config.json | 4 +- data-analysis/superscript.py | 96 +++++++++++++++++++++--------------- 2 files changed, 59 insertions(+), 41 deletions(-) diff --git a/data-analysis/config.json b/data-analysis/config.json index c146d833..f143132b 100644 --- a/data-analysis/config.json +++ b/data-analysis/config.json @@ -1,7 +1,7 @@ { - "max-threads": 1, + "max-threads": 0.5, "team": "", - "competition": "2020ilch", + "competition": "", "key":{ "database":"", "tba":"" diff --git a/data-analysis/superscript.py b/data-analysis/superscript.py index a73b035d..a3f04095 100644 --- a/data-analysis/superscript.py +++ b/data-analysis/superscript.py @@ -3,10 +3,13 @@ # Notes: # setup: -__version__ = "0.8.1" +__version__ = "0.8.2" # changelog should be viewed using print(analysis.__changelog__) __changelog__ = """changelog: + 0.8.2: + - readded while true to main function + - added more thread config options 0.8.1: - optimized matchloop further by bypassing GIL 0.8.0: @@ -121,7 +124,9 @@ from tra_analysis import analysis as an import data as d from collections import defaultdict import json +import math import numpy as np +import os from os import system, name from pathlib import Path from multiprocessing import Pool @@ -138,54 +143,67 @@ def main(): warnings.filterwarnings("ignore") - # while (True): + while (True): - current_time = time.time() - print("[OK] time: " + str(current_time)) + current_time = time.time() + print("[OK] time: " + str(current_time)) - config = load_config("config.json") - competition = config["competition"] - match_tests = config["statistics"]["match"] - pit_tests = config["statistics"]["pit"] - metrics_tests = config["statistics"]["metric"] - print("[OK] configs loaded") + config = load_config("config.json") + competition = config["competition"] + match_tests = config["statistics"]["match"] + pit_tests = config["statistics"]["pit"] + metrics_tests = config["statistics"]["metric"] + print("[OK] configs loaded") - print("[OK] starting threads") - exec_threads = Pool(processes = config["max-threads"]) - print("[OK] threads started") + print("[OK] starting threads") + cfg_max_threads = config["max-threads"] + sys_max_threads = os.cpu_count() + if cfg_max_threads > -sys_max_threads and cfg_max_threads < 0 : + alloc_processes = sys_max_threads + cfg_max_threads + elif cfg_max_threads > 0 and cfg_max_threads < 1: + alloc_processes = math.floor(cfg_max_threads * sys_max_threads) + elif cfg_max_threads > 1 and cfg_max_threads <= sys_max_threads: + alloc_processes = cfg_max_threads + elif cfg_max_threads == 0: + alloc_processes = sys_max_threads + else: + print("[Err] Invalid number of processes, must be between -" + str(sys_max_threads) + " and " + str(sys_max_threads)) + exit() + exec_threads = Pool(processes = alloc_processes) + print("[OK] " + str(alloc_processes) + " threads started") - apikey = config["key"]["database"] - tbakey = config["key"]["tba"] - print("[OK] loaded keys") + apikey = config["key"]["database"] + tbakey = config["key"]["tba"] + print("[OK] loaded keys") - previous_time = get_previous_time(apikey) - print("[OK] analysis backtimed to: " + str(previous_time)) + previous_time = get_previous_time(apikey) + print("[OK] analysis backtimed to: " + str(previous_time)) - print("[OK] loading data") - start = time.time() - match_data = load_match(apikey, competition) - pit_data = load_pit(apikey, competition) - print("[OK] loaded data in " + str(time.time() - start) + " seconds") + print("[OK] loading data") + start = time.time() + match_data = load_match(apikey, competition) + pit_data = load_pit(apikey, competition) + print("[OK] loaded data in " + str(time.time() - start) + " seconds") - print("[OK] running match stats") - start = time.time() - matchloop(apikey, competition, match_data, match_tests) - print("[OK] finished match stats in " + str(time.time() - start) + " seconds") + print("[OK] running match stats") + start = time.time() + matchloop(apikey, competition, match_data, match_tests) + print("[OK] finished match stats in " + str(time.time() - start) + " seconds") - print("[OK] running team metrics") - start = time.time() - metricloop(tbakey, apikey, competition, previous_time, metrics_tests) - print("[OK] finished team metrics in " + str(time.time() - start) + " seconds") + print("[OK] running team metrics") + start = time.time() + metricloop(tbakey, apikey, competition, previous_time, metrics_tests) + print("[OK] finished team metrics in " + str(time.time() - start) + " seconds") - print("[OK] running pit analysis") - start = time.time() - pitloop(apikey, competition, pit_data, pit_tests) - print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds") - - set_current_time(apikey, current_time) - print("[OK] finished all tests, looping") + print("[OK] running pit analysis") + start = time.time() + pitloop(apikey, competition, pit_data, pit_tests) + print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds") + + set_current_time(apikey, current_time) + print("[OK] finished all tests, looping") - #clear() + clear() def clear(): From b32083c6dae885f224cc13b6dc298aba0bafd749 Mon Sep 17 00:00:00 2001 From: Arthur Lu Date: Thu, 24 Sep 2020 13:14:13 +0000 Subject: [PATCH 5/5] added tra-analysis to data-analysis requirements Signed-off-by: Arthur Lu --- data-analysis/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data-analysis/requirements.txt b/data-analysis/requirements.txt index a87051bc..fb3bb47d 100644 --- a/data-analysis/requirements.txt +++ b/data-analysis/requirements.txt @@ -1,4 +1,5 @@ requests pymongo pandas -dnspython \ No newline at end of file +dnspython +tra-analysis \ No newline at end of file