From 911423f87924591fdec669f7ec0fc12ee38a34d7 Mon Sep 17 00:00:00 2001
From: Arthur Lu <learthurgo@gmail.com>
Date: Mon, 21 Sep 2020 05:59:15 +0000
Subject: [PATCH 1/5] superscript.py v 0.8.0

Signed-off-by: Arthur Lu <learthurgo@gmail.com>
---
 data-analysis/superscript.py | 139 +++++++++++++++++++++++------------
 1 file changed, 93 insertions(+), 46 deletions(-)

diff --git a/data-analysis/superscript.py b/data-analysis/superscript.py
index fdd4e5a5..cb91adbe 100644
--- a/data-analysis/superscript.py
+++ b/data-analysis/superscript.py
@@ -3,10 +3,13 @@
 # Notes:
 # setup:
 
-__version__ = "0.7.0"
+__version__ = "0.8.0"
 
 # changelog should be viewed using print(analysis.__changelog__)
 __changelog__ = """changelog:
+	0.8.0:
+		- added multithreading to matchloop
+		- tweaked user log
 	0.7.0:
 		- finished implementing main function
 	0.6.2:
@@ -114,62 +117,72 @@ __all__ = [
 
 from tra_analysis import analysis as an
 import data as d
+from collections import defaultdict
 import json
 import numpy as np
 from os import system, name
 from pathlib import Path
 import matplotlib.pyplot as plt
+from concurrent.futures import ThreadPoolExecutor
 import time
 import warnings
 
+global exec_threads
+
 def main():
 
+	global exec_threads
+
 	warnings.filterwarnings("ignore")
 
-	while (True):
+	# while (True):
 
-		current_time = time.time()
-		print("[OK] time: " + str(current_time))
+	current_time = time.time()
+	print("[OK] time: " + str(current_time))
 
-		config = load_config("config.json")
-		competition = config["competition"]
-		match_tests = config["statistics"]["match"]
-		pit_tests = config["statistics"]["pit"]
-		metrics_tests = config["statistics"]["metric"]
-		print("[OK] configs loaded")
+	config = load_config("config.json")
+	competition = config["competition"]
+	match_tests = config["statistics"]["match"]
+	pit_tests = config["statistics"]["pit"]
+	metrics_tests = config["statistics"]["metric"]
+	print("[OK] configs loaded")
 
-		apikey = config["key"]["database"]
-		tbakey = config["key"]["tba"]
-		print("[OK] loaded keys")
+	print("[OK] starting threads")
+	exec_threads = ThreadPoolExecutor(max_workers = config["max-threads"])
+	print("[OK] threads started")
 
-		previous_time = get_previous_time(apikey)
-		print("[OK] analysis backtimed to: " + str(previous_time))
+	apikey = config["key"]["database"]
+	tbakey = config["key"]["tba"]
+	print("[OK] loaded keys")
 
-		print("[OK] loading data")
-		start = time.time()
-		match_data = load_match(apikey, competition)
-		pit_data = load_pit(apikey, competition)
-		print("[OK] loaded data in " + str(time.time() - start) + " seconds")
+	previous_time = get_previous_time(apikey)
+	print("[OK] analysis backtimed to: " + str(previous_time))
 
-		print("[OK] running tests")
-		start = time.time()
-		matchloop(apikey, competition, match_data, match_tests)
-		print("[OK] finished tests in " + str(time.time() - start) + " seconds")
+	print("[OK] loading data")
+	start = time.time()
+	match_data = load_match(apikey, competition)
+	pit_data = load_pit(apikey, competition)
+	print("[OK] loaded data in " + str(time.time() - start) + " seconds")
 
-		print("[OK] running metrics")
-		start = time.time()
-		metricloop(tbakey, apikey, competition, previous_time, metrics_tests)
-		print("[OK] finished metrics in " + str(time.time() - start) + " seconds")
+	print("[OK] running match stats")
+	start = time.time()
+	matchloop(apikey, competition, match_data, match_tests)
+	print("[OK] finished match stats in " + str(time.time() - start) + " seconds")
 
-		print("[OK] running pit analysis")
-		start = time.time()
-		pitloop(apikey, competition, pit_data, pit_tests)
-		print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds")
-		
-		set_current_time(apikey, current_time)
-		print("[OK] finished all tests, looping")
+	print("[OK] running team metrics")
+	start = time.time()
+	metricloop(tbakey, apikey, competition, previous_time, metrics_tests)
+	print("[OK] finished team metrics in " + str(time.time() - start) + " seconds")
 
-		clear()
+	print("[OK] running pit analysis")
+	start = time.time()
+	pitloop(apikey, competition, pit_data, pit_tests)
+	print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds")
+	
+	set_current_time(apikey, current_time)
+	print("[OK] finished all tests, looping")
+
+	#clear()
 
 def clear(): 
 	
@@ -219,12 +232,18 @@ def load_match(apikey, competition):
 
 def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match]
 
-	def simplestats(data, test):
+	start = time.time()
 
-		data = np.array(data)
+	global exec_threads
+
+	def simplestats(data_test):
+
+		data = np.array(data_test[0])
 		data = data[np.isfinite(data)]
 		ranges = list(range(len(data)))
 
+		test = data_test[1]
+
 		if test == "basic_stats":
 			return an.basic_stats(data)
 
@@ -246,19 +265,47 @@ def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][
 		if test == "regression_sigmoidal":
 			return an.regression(ranges, data, ['sig'])
 
+	class AutoVivification(dict):
+		def __getitem__(self, item):
+			try:
+				return dict.__getitem__(self, item)
+			except KeyError:
+				value = self[item] = type(self)()
+				return value
+
 	return_vector = {}
+	
+	team_filtered = []
+	variable_filtered = []
+	variable_data = []
+	test_filtered = []
+	result_filtered = []
+	return_vector = AutoVivification()
+
 	for team in data:
-		variable_vector = {}
+
 		for variable in data[team]:
-			test_vector = {}
-			variable_data = data[team][variable]
+
 			if variable in tests:
+
 				for test in tests[variable]:
-					test_vector[test] = simplestats(variable_data, test)
-			else:
-				pass      
-			variable_vector[variable] = test_vector
-		return_vector[team] = variable_vector
+
+					team_filtered.append(team)
+					variable_filtered.append(variable)
+					variable_data.append((data[team][variable], test))
+					test_filtered.append(test)
+
+	result_filtered = exec_threads.map(simplestats, variable_data)
+	i = 0
+
+	result_filtered = list(result_filtered)
+
+	for result in result_filtered:
+
+		return_vector[team_filtered[i]][variable_filtered[i]][test_filtered[i]] = result
+		i += 1
+
+	print("metrics finished in " + str(time.time() - start))
 
 	push_match(apikey, competition, return_vector)
 

From c048f850c0cd9c38074ad05dd2fe9d145acae19a Mon Sep 17 00:00:00 2001
From: Arthur Lu <learthurgo@gmail.com>
Date: Mon, 21 Sep 2020 07:21:59 +0000
Subject: [PATCH 2/5] added max-threads key in config

Signed-off-by: Arthur Lu <learthurgo@gmail.com>
---
 data-analysis/config.json    | 1 +
 data-analysis/superscript.py | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/data-analysis/config.json b/data-analysis/config.json
index eca2d09a..a1715a95 100644
--- a/data-analysis/config.json
+++ b/data-analysis/config.json
@@ -1,4 +1,5 @@
 {
+	"max-threads": 8,
 	"team": "",
 	"competition": "2020ilch",
 	"key":{
diff --git a/data-analysis/superscript.py b/data-analysis/superscript.py
index cb91adbe..ab7eec74 100644
--- a/data-analysis/superscript.py
+++ b/data-analysis/superscript.py
@@ -232,8 +232,6 @@ def load_match(apikey, competition):
 
 def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match]
 
-	start = time.time()
-
 	global exec_threads
 
 	def simplestats(data_test):
@@ -305,8 +303,6 @@ def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][
 		return_vector[team_filtered[i]][variable_filtered[i]][test_filtered[i]] = result
 		i += 1
 
-	print("metrics finished in " + str(time.time() - start))
-
 	push_match(apikey, competition, return_vector)
 
 def load_metric(apikey, competition, match, group_name, metrics):

From f9fd61e8a5c4637b006920b920f1ebb90ed62f47 Mon Sep 17 00:00:00 2001
From: Arthur Lu <learthurgo@gmail.com>
Date: Mon, 21 Sep 2020 07:38:18 +0000
Subject: [PATCH 3/5] superscript.py v 0.8.1

Signed-off-by: Arthur Lu <learthurgo@gmail.com>
---
 data-analysis/config.json    |  2 +-
 data-analysis/superscript.py | 65 +++++++++++++++++++-----------------
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/data-analysis/config.json b/data-analysis/config.json
index a1715a95..c146d833 100644
--- a/data-analysis/config.json
+++ b/data-analysis/config.json
@@ -1,5 +1,5 @@
 {
-	"max-threads": 8,
+	"max-threads": 1,
 	"team": "",
 	"competition": "2020ilch",
 	"key":{
diff --git a/data-analysis/superscript.py b/data-analysis/superscript.py
index ab7eec74..a73b035d 100644
--- a/data-analysis/superscript.py
+++ b/data-analysis/superscript.py
@@ -3,10 +3,12 @@
 # Notes:
 # setup:
 
-__version__ = "0.8.0"
+__version__ = "0.8.1"
 
 # changelog should be viewed using print(analysis.__changelog__)
 __changelog__ = """changelog:
+	0.8.1:
+		- optimized matchloop further by bypassing GIL
 	0.8.0:
 		- added multithreading to matchloop
 		- tweaked user log
@@ -122,6 +124,7 @@ import json
 import numpy as np
 from os import system, name
 from pathlib import Path
+from multiprocessing import Pool
 import matplotlib.pyplot as plt
 from concurrent.futures import ThreadPoolExecutor
 import time
@@ -148,7 +151,7 @@ def main():
 	print("[OK] configs loaded")
 
 	print("[OK] starting threads")
-	exec_threads = ThreadPoolExecutor(max_workers = config["max-threads"])
+	exec_threads = Pool(processes = config["max-threads"])
 	print("[OK] threads started")
 
 	apikey = config["key"]["database"]
@@ -230,39 +233,39 @@ def load_match(apikey, competition):
 
 	return d.get_match_data_formatted(apikey, competition)
 
+def simplestats(data_test):
+
+	data = np.array(data_test[0])
+	data = data[np.isfinite(data)]
+	ranges = list(range(len(data)))
+
+	test = data_test[1]
+
+	if test == "basic_stats":
+		return an.basic_stats(data)
+
+	if test == "historical_analysis":
+		return an.histo_analysis([ranges, data])
+
+	if test == "regression_linear":
+		return an.regression(ranges, data, ['lin'])
+
+	if test == "regression_logarithmic":
+		return an.regression(ranges, data, ['log'])
+
+	if test == "regression_exponential":
+		return an.regression(ranges, data, ['exp'])
+
+	if test == "regression_polynomial":
+		return an.regression(ranges, data, ['ply'])
+
+	if test == "regression_sigmoidal":
+		return an.regression(ranges, data, ['sig'])
+
 def matchloop(apikey, competition, data, tests): # expects 3D array with [Team][Variable][Match]
 
 	global exec_threads
 
-	def simplestats(data_test):
-
-		data = np.array(data_test[0])
-		data = data[np.isfinite(data)]
-		ranges = list(range(len(data)))
-
-		test = data_test[1]
-
-		if test == "basic_stats":
-			return an.basic_stats(data)
-
-		if test == "historical_analysis":
-			return an.histo_analysis([ranges, data])
-
-		if test == "regression_linear":
-			return an.regression(ranges, data, ['lin'])
-
-		if test == "regression_logarithmic":
-			return an.regression(ranges, data, ['log'])
-
-		if test == "regression_exponential":
-			return an.regression(ranges, data, ['exp'])
-
-		if test == "regression_polynomial":
-			return an.regression(ranges, data, ['ply'])
-
-		if test == "regression_sigmoidal":
-			return an.regression(ranges, data, ['sig'])
-
 	class AutoVivification(dict):
 		def __getitem__(self, item):
 			try:

From d97976da12fc724d5a0863346c8c5137364f79e8 Mon Sep 17 00:00:00 2001
From: Arthur Lu <learthurgo@gmail.com>
Date: Sat, 26 Sep 2020 20:57:39 +0000
Subject: [PATCH 4/5] superscript.py v 0.8.2

Signed-off-by: Arthur Lu <learthurgo@gmail.com>
---
 data-analysis/config.json    |  4 +-
 data-analysis/superscript.py | 96 +++++++++++++++++++++---------------
 2 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/data-analysis/config.json b/data-analysis/config.json
index c146d833..f143132b 100644
--- a/data-analysis/config.json
+++ b/data-analysis/config.json
@@ -1,7 +1,7 @@
 {
-	"max-threads": 1,
+	"max-threads": 0.5,
 	"team": "",
-	"competition": "2020ilch",
+	"competition": "",
 	"key":{
 		"database":"",
 		"tba":""
diff --git a/data-analysis/superscript.py b/data-analysis/superscript.py
index a73b035d..a3f04095 100644
--- a/data-analysis/superscript.py
+++ b/data-analysis/superscript.py
@@ -3,10 +3,13 @@
 # Notes:
 # setup:
 
-__version__ = "0.8.1"
+__version__ = "0.8.2"
 
 # changelog should be viewed using print(analysis.__changelog__)
 __changelog__ = """changelog:
+	0.8.2:
+		- readded while true to main function
+		- added more thread config options
 	0.8.1:
 		- optimized matchloop further by bypassing GIL
 	0.8.0:
@@ -121,7 +124,9 @@ from tra_analysis import analysis as an
 import data as d
 from collections import defaultdict
 import json
+import math
 import numpy as np
+import os
 from os import system, name
 from pathlib import Path
 from multiprocessing import Pool
@@ -138,54 +143,67 @@ def main():
 
 	warnings.filterwarnings("ignore")
 
-	# while (True):
+	while (True):
 
-	current_time = time.time()
-	print("[OK] time: " + str(current_time))
+		current_time = time.time()
+		print("[OK] time: " + str(current_time))
 
-	config = load_config("config.json")
-	competition = config["competition"]
-	match_tests = config["statistics"]["match"]
-	pit_tests = config["statistics"]["pit"]
-	metrics_tests = config["statistics"]["metric"]
-	print("[OK] configs loaded")
+		config = load_config("config.json")
+		competition = config["competition"]
+		match_tests = config["statistics"]["match"]
+		pit_tests = config["statistics"]["pit"]
+		metrics_tests = config["statistics"]["metric"]
+		print("[OK] configs loaded")
 
-	print("[OK] starting threads")
-	exec_threads = Pool(processes = config["max-threads"])
-	print("[OK] threads started")
+		print("[OK] starting threads")
+		cfg_max_threads = config["max-threads"]
+		sys_max_threads = os.cpu_count()
+		if cfg_max_threads > -sys_max_threads and cfg_max_threads < 0 :
+			alloc_processes = sys_max_threads + cfg_max_threads
+		elif cfg_max_threads > 0 and cfg_max_threads < 1:
+			alloc_processes = math.floor(cfg_max_threads * sys_max_threads)
+		elif cfg_max_threads > 1 and cfg_max_threads <= sys_max_threads:
+			alloc_processes = cfg_max_threads
+		elif cfg_max_threads == 0:
+			alloc_processes = sys_max_threads
+		else:
+			print("[Err] Invalid number of processes, must be between -" + str(sys_max_threads) + " and " + str(sys_max_threads))
+			exit()
+		exec_threads = Pool(processes = alloc_processes)
+		print("[OK] " + str(alloc_processes) + " threads started")
 
-	apikey = config["key"]["database"]
-	tbakey = config["key"]["tba"]
-	print("[OK] loaded keys")
+		apikey = config["key"]["database"]
+		tbakey = config["key"]["tba"]
+		print("[OK] loaded keys")
 
-	previous_time = get_previous_time(apikey)
-	print("[OK] analysis backtimed to: " + str(previous_time))
+		previous_time = get_previous_time(apikey)
+		print("[OK] analysis backtimed to: " + str(previous_time))
 
-	print("[OK] loading data")
-	start = time.time()
-	match_data = load_match(apikey, competition)
-	pit_data = load_pit(apikey, competition)
-	print("[OK] loaded data in " + str(time.time() - start) + " seconds")
+		print("[OK] loading data")
+		start = time.time()
+		match_data = load_match(apikey, competition)
+		pit_data = load_pit(apikey, competition)
+		print("[OK] loaded data in " + str(time.time() - start) + " seconds")
 
-	print("[OK] running match stats")
-	start = time.time()
-	matchloop(apikey, competition, match_data, match_tests)
-	print("[OK] finished match stats in " + str(time.time() - start) + " seconds")
+		print("[OK] running match stats")
+		start = time.time()
+		matchloop(apikey, competition, match_data, match_tests)
+		print("[OK] finished match stats in " + str(time.time() - start) + " seconds")
 
-	print("[OK] running team metrics")
-	start = time.time()
-	metricloop(tbakey, apikey, competition, previous_time, metrics_tests)
-	print("[OK] finished team metrics in " + str(time.time() - start) + " seconds")
+		print("[OK] running team metrics")
+		start = time.time()
+		metricloop(tbakey, apikey, competition, previous_time, metrics_tests)
+		print("[OK] finished team metrics in " + str(time.time() - start) + " seconds")
 
-	print("[OK] running pit analysis")
-	start = time.time()
-	pitloop(apikey, competition, pit_data, pit_tests)
-	print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds")
-	
-	set_current_time(apikey, current_time)
-	print("[OK] finished all tests, looping")
+		print("[OK] running pit analysis")
+		start = time.time()
+		pitloop(apikey, competition, pit_data, pit_tests)
+		print("[OK] finished pit analysis in " + str(time.time() - start) + " seconds")
+		
+		set_current_time(apikey, current_time)
+		print("[OK] finished all tests, looping")
 
-	#clear()
+		clear()
 
 def clear(): 
 	

From b9ffac5b202a5368babfb7c8ea08a6165ac8c6ac Mon Sep 17 00:00:00 2001
From: Arthur Lu <learthurgo@gmail.com>
Date: Thu, 24 Sep 2020 13:14:13 +0000
Subject: [PATCH 5/5] added tra-analysis to data-analysis requirements

Signed-off-by: Arthur Lu <learthurgo@gmail.com>
---
 data-analysis/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/data-analysis/requirements.txt b/data-analysis/requirements.txt
index a87051bc..fb3bb47d 100644
--- a/data-analysis/requirements.txt
+++ b/data-analysis/requirements.txt
@@ -1,4 +1,5 @@
 requests
 pymongo
 pandas
-dnspython
\ No newline at end of file
+dnspython
+tra-analysis
\ No newline at end of file