Merge pull request #46 from titanscouting/multithread-testing

Implement Multithreading in Superscript
This commit is contained in:
Arthur Lu 2020-09-28 17:46:29 -05:00 committed by GitHub
commit 30f5687622
3 changed files with 106 additions and 40 deletions

View File

@ -1,6 +1,7 @@
{ {
"max-threads": 0.5,
"team": "", "team": "",
"competition": "2020ilch", "competition": "",
"key":{ "key":{
"database":"", "database":"",
"tba":"" "tba":""

View File

@ -2,3 +2,4 @@ requests
pymongo pymongo
pandas pandas
dnspython dnspython
tra-analysis

View File

@ -3,10 +3,18 @@
# Notes: # Notes:
# setup: # setup:
__version__ = "0.7.0" __version__ = "0.8.2"
# changelog should be viewed using print(analysis.__changelog__) # changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog: __changelog__ = """changelog:
0.8.2:
- readded while true to main function
- added more thread config options
0.8.1:
- optimized matchloop further by bypassing GIL
0.8.0:
- added multithreading to matchloop
- tweaked user log
0.7.0: 0.7.0:
- finished implementing main function - finished implementing main function
0.6.2: 0.6.2:
@ -114,16 +122,25 @@ __all__ = [
from tra_analysis import analysis as an from tra_analysis import analysis as an
import data as d import data as d
from collections import defaultdict
import json import json
import math
import numpy as np import numpy as np
import os
from os import system, name from os import system, name
from pathlib import Path from pathlib import Path
from multiprocessing import Pool
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor
import time import time
import warnings import warnings
global exec_threads
def main(): def main():
global exec_threads
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
while (True): while (True):
@ -138,6 +155,23 @@ def main():
metrics_tests = config["statistics"]["metric"] metrics_tests = config["statistics"]["metric"]
print("[OK] configs loaded") print("[OK] configs loaded")
print("[OK] starting threads")
cfg_max_threads = config["max-threads"]
sys_max_threads = os.cpu_count()
if cfg_max_threads > -sys_max_threads and cfg_max_threads < 0 :
alloc_processes = sys_max_threads + cfg_max_threads
elif cfg_max_threads > 0 and cfg_max_threads < 1:
alloc_processes = math.floor(cfg_max_threads * sys_max_threads)
elif cfg_max_threads > 1 and cfg_max_threads <= sys_max_threads:
alloc_processes = cfg_max_threads
elif cfg_max_threads == 0:
alloc_processes = sys_max_threads
else:
print("[Err] Invalid number of processes, must be between -" + str(sys_max_threads) + " and " + str(sys_max_threads))
exit()
exec_threads = Pool(processes = alloc_processes)
print("[OK] " + str(alloc_processes) + " threads started")
apikey = config["key"]["database"] apikey = config["key"]["database"]
tbakey = config["key"]["tba"] tbakey = config["key"]["tba"]
print("[OK] loaded keys") print("[OK] loaded keys")
@ -151,15 +185,15 @@ def main():
pit_data = load_pit(apikey, competition) pit_data = load_pit(apikey, competition)
print("[OK] loaded data in " + str(time.time() - start) + " seconds") print("[OK] loaded data in " + str(time.time() - start) + " seconds")
print("[OK] running tests") print("[OK] running match stats")
start = time.time() start = time.time()
matchloop(apikey, competition, match_data, match_tests) matchloop(apikey, competition, match_data, match_tests)
print("[OK] finished tests in " + str(time.time() - start) + " seconds") print("[OK] finished match stats in " + str(time.time() - start) + " seconds")
print("[OK] running metrics") print("[OK] running team metrics")
start = time.time() start = time.time()
metricloop(tbakey, apikey, competition, previous_time, metrics_tests) metricloop(tbakey, apikey, competition, previous_time, metrics_tests)
print("[OK] finished metrics in " + str(time.time() - start) + " seconds") print("[OK] finished team metrics in " + str(time.time() - start) + " seconds")
print("[OK] running pit analysis") print("[OK] running pit analysis")
start = time.time() start = time.time()
@ -217,48 +251,78 @@ def load_match(apikey, competition):
return d.get_match_data_formatted(apikey, competition) return d.get_match_data_formatted(apikey, competition)
def simplestats(data_test):
data = np.array(data_test[0])
data = data[np.isfinite(data)]
ranges = list(range(len(data)))
test = data_test[1]
if test == "basic_stats":
return an.basic_stats(data)
if test == "historical_analysis":
return an.histo_analysis([ranges, data])
if test == "regression_linear":
return an.regression(ranges, data, ['lin'])
if test == "regression_logarithmic":
return an.regression(ranges, data, ['log'])
if test == "regression_exponential":
return an.regression(ranges, data, ['exp'])
if test == "regression_polynomial":
return an.regression(ranges, data, ['ply'])
if test == "regression_sigmoidal":
return an.regression(ranges, data, ['sig'])
def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match] def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match]
def simplestats(data, test): global exec_threads
data = np.array(data) class AutoVivification(dict):
data = data[np.isfinite(data)] def __getitem__(self, item):
ranges = list(range(len(data))) try:
return dict.__getitem__(self, item)
if test == "basic_stats": except KeyError:
return an.basic_stats(data) value = self[item] = type(self)()
return value
if test == "historical_analysis":
return an.histo_analysis([ranges, data])
if test == "regression_linear":
return an.regression(ranges, data, ['lin'])
if test == "regression_logarithmic":
return an.regression(ranges, data, ['log'])
if test == "regression_exponential":
return an.regression(ranges, data, ['exp'])
if test == "regression_polynomial":
return an.regression(ranges, data, ['ply'])
if test == "regression_sigmoidal":
return an.regression(ranges, data, ['sig'])
return_vector = {} return_vector = {}
team_filtered = []
variable_filtered = []
variable_data = []
test_filtered = []
result_filtered = []
return_vector = AutoVivification()
for team in data: for team in data:
variable_vector = {}
for variable in data[team]: for variable in data[team]:
test_vector = {}
variable_data = data[team][variable]
if variable in tests: if variable in tests:
for test in tests[variable]: for test in tests[variable]:
test_vector[test] = simplestats(variable_data, test)
else: team_filtered.append(team)
pass variable_filtered.append(variable)
variable_vector[variable] = test_vector variable_data.append((data[team][variable], test))
return_vector[team] = variable_vector test_filtered.append(test)
result_filtered = exec_threads.map(simplestats, variable_data)
i = 0
result_filtered = list(result_filtered)
for result in result_filtered:
return_vector[team_filtered[i]][variable_filtered[i]][test_filtered[i]] = result
i += 1
push_match(apikey, competition, return_vector) push_match(apikey, competition, return_vector)