Merge pull request #46 from titanscouting/multithread-testing

Implement Multithreading in Superscript
2026-01-29 14:21:03 +00:00 · 2020-09-28 17:46:29 -05:00
parent 88f68782f7 b9ffac5b20
commit 30f5687622
3 changed files with 106 additions and 40 deletions
--- a/data-analysis/config.json
+++ b/data-analysis/config.json
@@ -1,6 +1,7 @@
 {
 	"max-threads": 0.5,
 	"team": "",
-	"competition": "2020ilch",
+	"competition": "",
 	"key":{
 		"database":"",
 		"tba":""
--- a/data-analysis/requirements.txt
+++ b/data-analysis/requirements.txt
@@ -2,3 +2,4 @@ requests
 pymongo
 pandas
 dnspython
 tra-analysis
--- a/data-analysis/superscript.py
+++ b/data-analysis/superscript.py
@@ -3,10 +3,18 @@
 # Notes:
 # setup:
-__version__ = "0.7.0"
+__version__ = "0.8.2"
 # changelog should be viewed using print(analysis.__changelog__)
 __changelog__ = """changelog:
 	0.8.2:
 		- readded while true to main function
 		- added more thread config options
 	0.8.1:
 		- optimized matchloop further by bypassing GIL
 	0.8.0:
 		- added multithreading to matchloop
 		- tweaked user log
 	0.7.0:
 		- finished implementing main function
 	0.6.2:
@@ -114,16 +122,25 @@ __all__ = [
 from tra_analysis import analysis as an
 import data as d
 from collections import defaultdict
 import json
 import math
 import numpy as np
 import os
 from os import system, name
 from pathlib import Path
 from multiprocessing import Pool
 import matplotlib.pyplot as plt
 from concurrent.futures import ThreadPoolExecutor
 import time
 import warnings
 global exec_threads
 def main():
 	global exec_threads
 	warnings.filterwarnings("ignore")
 	while (True):
@@ -138,6 +155,23 @@ def main():
 		metrics_tests = config["statistics"]["metric"]
 		print("[OK] configs loaded")
 		print("[OK] starting threads")
 		cfg_max_threads = config["max-threads"]
 		sys_max_threads = os.cpu_count()
 		if cfg_max_threads > -sys_max_threads and cfg_max_threads < 0 :
 			alloc_processes = sys_max_threads + cfg_max_threads
 		elif cfg_max_threads > 0 and cfg_max_threads < 1:
 			alloc_processes = math.floor(cfg_max_threads * sys_max_threads)
 		elif cfg_max_threads > 1 and cfg_max_threads <= sys_max_threads:
 			alloc_processes = cfg_max_threads
 		elif cfg_max_threads == 0:
 			alloc_processes = sys_max_threads
 		else:
 			print("[Err] Invalid number of processes, must be between -" + str(sys_max_threads) + " and " + str(sys_max_threads))
 			exit()
 		exec_threads = Pool(processes = alloc_processes)
 		print("[OK] " + str(alloc_processes) + " threads started")
 		apikey = config["key"]["database"]
 		tbakey = config["key"]["tba"]
 		print("[OK] loaded keys")
@@ -151,15 +185,15 @@ def main():
 		pit_data = load_pit(apikey, competition)
 		print("[OK] loaded data in " + str(time.time() - start) + " seconds")
-		print("[OK] running tests")
+		print("[OK] running match stats")
 		start = time.time()
 		matchloop(apikey, competition, match_data, match_tests)
-		print("[OK] finished tests in " + str(time.time() - start) + " seconds")
+		print("[OK] finished match stats in " + str(time.time() - start) + " seconds")
-		print("[OK] running metrics")
+		print("[OK] running team metrics")
 		start = time.time()
 		metricloop(tbakey, apikey, competition, previous_time, metrics_tests)
-		print("[OK] finished metrics in " + str(time.time() - start) + " seconds")
+		print("[OK] finished team metrics in " + str(time.time() - start) + " seconds")
 		print("[OK] running pit analysis")
 		start = time.time()
@@ -217,14 +251,14 @@ def load_match(apikey, competition):
 	return d.get_match_data_formatted(apikey, competition)
-def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match]
+def simplestats(data_test):
-	def simplestats(data, test):
+	data = np.array(data_test[0])
 		data = np.array(data)
 	data = data[np.isfinite(data)]
 	ranges = list(range(len(data)))
 	test = data_test[1]
 	if test == "basic_stats":
 		return an.basic_stats(data)
@@ -246,19 +280,49 @@ def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][
 	if test == "regression_sigmoidal":
 		return an.regression(ranges, data, ['sig'])
 def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match]
 	global exec_threads
 	class AutoVivification(dict):
 		def __getitem__(self, item):
 			try:
 				return dict.__getitem__(self, item)
 			except KeyError:
 				value = self[item] = type(self)()
 				return value
 	return_vector = {}
 	team_filtered = []
 	variable_filtered = []
 	variable_data = []
 	test_filtered = []
 	result_filtered = []
 	return_vector = AutoVivification()
 	for team in data:
-		variable_vector = {}
+
 		for variable in data[team]:
-			test_vector = {}
+
 			variable_data = data[team][variable]
 			if variable in tests:
 				for test in tests[variable]:
-					test_vector[test] = simplestats(variable_data, test)
+
-			else:
+					team_filtered.append(team)
-				pass      
+					variable_filtered.append(variable)
-			variable_vector[variable] = test_vector
+					variable_data.append((data[team][variable], test))
-		return_vector[team] = variable_vector
+					test_filtered.append(test)
 	result_filtered = exec_threads.map(simplestats, variable_data)
 	i = 0
 	result_filtered = list(result_filtered)
 	for result in result_filtered:
 		return_vector[team_filtered[i]][variable_filtered[i]][test_filtered[i]] = result
 		i += 1
 	push_match(apikey, competition, return_vector)