mirror of
https://github.com/titanscouting/tra-analysis.git
synced 2024-12-27 01:59:08 +00:00
analysis.py - v 1.0.6.002
changelog: - bug fixes
This commit is contained in:
parent
655387df8f
commit
6bfc258e85
Binary file not shown.
@ -8,9 +8,12 @@
|
|||||||
|
|
||||||
#setup:
|
#setup:
|
||||||
|
|
||||||
__version__ = "1.0.6.001"
|
__version__ = "1.0.6.002"
|
||||||
|
|
||||||
|
#changelog should be viewed using print(analysis.__changelog__)
|
||||||
__changelog__ = """changelog:
|
__changelog__ = """changelog:
|
||||||
|
1.0.6.002:
|
||||||
|
- bug fixes
|
||||||
1.0.6.001:
|
1.0.6.001:
|
||||||
- corrected __all__ to contain all of the functions
|
- corrected __all__ to contain all of the functions
|
||||||
1.0.6.000:
|
1.0.6.000:
|
||||||
@ -62,7 +65,7 @@ __changelog__ = """changelog:
|
|||||||
- major bug fixes
|
- major bug fixes
|
||||||
1.0.0.xxx:
|
1.0.0.xxx:
|
||||||
- added loading csv
|
- added loading csv
|
||||||
- added 1d, column, row basic stats""" #changelog should be viewed using print(analysis.__changelog__)
|
- added 1d, column, row basic stats"""
|
||||||
|
|
||||||
__author__ = (
|
__author__ = (
|
||||||
"Arthur Lu <arthurlu@ttic.edu>, "
|
"Arthur Lu <arthurlu@ttic.edu>, "
|
||||||
@ -392,7 +395,6 @@ def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, '
|
|||||||
data_t = []
|
data_t = []
|
||||||
|
|
||||||
for i in range (0, len(data) - 1, 1):
|
for i in range (0, len(data) - 1, 1):
|
||||||
|
|
||||||
data_t.append(float(data[i]))
|
data_t.append(float(data[i]))
|
||||||
|
|
||||||
_mean = mean(data_t)
|
_mean = mean(data_t)
|
||||||
@ -403,19 +405,14 @@ def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, '
|
|||||||
_mode = None
|
_mode = None
|
||||||
try:
|
try:
|
||||||
_stdev = stdev(data_t)
|
_stdev = stdev(data_t)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
|
||||||
_stdev = None
|
_stdev = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_variance = variance(data_t)
|
_variance = variance(data_t)
|
||||||
except:
|
except:
|
||||||
_variance = None
|
_variance = None
|
||||||
|
|
||||||
out = [_mean, _median, _mode, _stdev, _variance]
|
return [_mean, _median, _mode, _stdev, _variance]
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
elif method == "column" or method == 1:
|
elif method == "column" or method == 1:
|
||||||
|
|
||||||
@ -443,9 +440,7 @@ def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, '
|
|||||||
except:
|
except:
|
||||||
_variance = None
|
_variance = None
|
||||||
|
|
||||||
out = [_mean, _median, _mode, _stdev, _variance]
|
return [_mean, _median, _mode, _stdev, _variance]
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
elif method == "row" or method == 2:
|
elif method == "row" or method == 2:
|
||||||
|
|
||||||
@ -469,9 +464,8 @@ def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, '
|
|||||||
except:
|
except:
|
||||||
_variance = None
|
_variance = None
|
||||||
|
|
||||||
out = [_mean, _median, _mode, _stdev, _variance]
|
return [_mean, _median, _mode, _stdev, _variance]
|
||||||
|
|
||||||
return out
|
|
||||||
else:
|
else:
|
||||||
raise error("method error")
|
raise error("method error")
|
||||||
|
|
||||||
@ -482,17 +476,12 @@ def z_score(point, mean, stdev): #returns z score with inputs of point, mean and
|
|||||||
def stdev_z_split(mean, stdev, delta, low_bound, high_bound): #returns n-th percentile of spread given mean, standard deviation, lower z-score, and upper z-score
|
def stdev_z_split(mean, stdev, delta, low_bound, high_bound): #returns n-th percentile of spread given mean, standard deviation, lower z-score, and upper z-score
|
||||||
|
|
||||||
z_split = []
|
z_split = []
|
||||||
|
|
||||||
i = low_bound
|
i = low_bound
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
z_split.append(float((1 / (stdev * math.sqrt(2 * math.pi))) * math.e ** (-0.5 * (((i - mean) / stdev) ** 2))))
|
z_split.append(float((1 / (stdev * math.sqrt(2 * math.pi))) * math.e ** (-0.5 * (((i - mean) / stdev) ** 2))))
|
||||||
|
|
||||||
i = i + delta
|
i = i + delta
|
||||||
|
|
||||||
if i > high_bound:
|
if i > high_bound:
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
return z_split
|
return z_split
|
||||||
@ -546,15 +535,12 @@ def histo_analysis(hist_data, delta, low_bound, high_bound):
|
|||||||
i = low_bound
|
i = low_bound
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
if i > high_bound:
|
if i > high_bound:
|
||||||
break
|
break
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pred_change = mean_derivative + i * stdev_derivative
|
pred_change = mean_derivative + i * stdev_derivative
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
|
||||||
pred_change = mean_derivative
|
pred_change = mean_derivative
|
||||||
|
|
||||||
predictions.append(float(hist_data[-1:][0]) + pred_change)
|
predictions.append(float(hist_data[-1:][0]) + pred_change)
|
||||||
@ -566,21 +552,16 @@ def histo_analysis(hist_data, delta, low_bound, high_bound):
|
|||||||
def poly_regression(x, y, power):
|
def poly_regression(x, y, power):
|
||||||
|
|
||||||
if x == "null": #if x is 'null', then x will be filled with integer points between 1 and the size of y
|
if x == "null": #if x is 'null', then x will be filled with integer points between 1 and the size of y
|
||||||
|
|
||||||
x = []
|
x = []
|
||||||
|
|
||||||
for i in range(len(y)):
|
for i in range(len(y)):
|
||||||
|
|
||||||
print(i)
|
print(i)
|
||||||
|
|
||||||
x.append(i+1)
|
x.append(i+1)
|
||||||
|
|
||||||
reg_eq = scipy.polyfit(x, y, deg = power)
|
reg_eq = scipy.polyfit(x, y, deg = power)
|
||||||
|
|
||||||
eq_str = ""
|
eq_str = ""
|
||||||
|
|
||||||
for i in range(0, len(reg_eq), 1):
|
for i in range(0, len(reg_eq), 1):
|
||||||
|
|
||||||
if i < len(reg_eq)- 1:
|
if i < len(reg_eq)- 1:
|
||||||
eq_str = eq_str + str(reg_eq[i]) + "*(z**" + str(len(reg_eq) - i - 1) + ")+"
|
eq_str = eq_str + str(reg_eq[i]) + "*(z**" + str(len(reg_eq) - i - 1) + ")+"
|
||||||
else:
|
else:
|
||||||
@ -590,11 +571,9 @@ def poly_regression(x, y, power):
|
|||||||
|
|
||||||
for i in range(0, len(x), 1):
|
for i in range(0, len(x), 1):
|
||||||
z = x[i]
|
z = x[i]
|
||||||
|
|
||||||
exec("vals.append(" + eq_str + ")")
|
exec("vals.append(" + eq_str + ")")
|
||||||
|
|
||||||
_rms = rms(vals, y)
|
_rms = rms(vals, y)
|
||||||
|
|
||||||
r2_d2 = r_squared(vals, y)
|
r2_d2 = r_squared(vals, y)
|
||||||
|
|
||||||
return [eq_str, _rms, r2_d2]
|
return [eq_str, _rms, r2_d2]
|
||||||
@ -604,23 +583,17 @@ def log_regression(x, y, base):
|
|||||||
x_fit = []
|
x_fit = []
|
||||||
|
|
||||||
for i in range(len(x)):
|
for i in range(len(x)):
|
||||||
|
|
||||||
x_fit.append(np.log(x[i]) / np.log(base)) #change of base for logs
|
x_fit.append(np.log(x[i]) / np.log(base)) #change of base for logs
|
||||||
|
|
||||||
reg_eq = np.polyfit(x_fit, y, 1) # y = reg_eq[0] * log(x, base) + reg_eq[1]
|
reg_eq = np.polyfit(x_fit, y, 1) # y = reg_eq[0] * log(x, base) + reg_eq[1]
|
||||||
|
|
||||||
eq_str = str(reg_eq[0]) + "* (np.log(z) / np.log(" + str(base) +"))+" + str(reg_eq[1])
|
eq_str = str(reg_eq[0]) + "* (np.log(z) / np.log(" + str(base) +"))+" + str(reg_eq[1])
|
||||||
|
|
||||||
vals = []
|
vals = []
|
||||||
|
|
||||||
for i in range(len(x)):
|
for i in range(len(x)):
|
||||||
|
|
||||||
z = x[i]
|
z = x[i]
|
||||||
|
|
||||||
exec("vals.append(" + eq_str + ")")
|
exec("vals.append(" + eq_str + ")")
|
||||||
|
|
||||||
_rms = rms(vals, y)
|
_rms = rms(vals, y)
|
||||||
|
|
||||||
r2_d2 = r_squared(vals, y)
|
r2_d2 = r_squared(vals, y)
|
||||||
|
|
||||||
return [eq_str, _rms, r2_d2]
|
return [eq_str, _rms, r2_d2]
|
||||||
@ -630,23 +603,17 @@ def exp_regression(x, y, base):
|
|||||||
y_fit = []
|
y_fit = []
|
||||||
|
|
||||||
for i in range(len(y)):
|
for i in range(len(y)):
|
||||||
|
|
||||||
y_fit.append(np.log(y[i]) / np.log(base)) #change of base for logs
|
y_fit.append(np.log(y[i]) / np.log(base)) #change of base for logs
|
||||||
|
|
||||||
reg_eq = np.polyfit(x, y_fit, 1, w=np.sqrt(y_fit)) # y = base ^ (reg_eq[0] * x) * base ^ (reg_eq[1])
|
reg_eq = np.polyfit(x, y_fit, 1, w=np.sqrt(y_fit)) # y = base ^ (reg_eq[0] * x) * base ^ (reg_eq[1])
|
||||||
|
|
||||||
eq_str = "(" + str(base) + "**(" + str(reg_eq[0]) + "*z))*(" + str(base) + "**(" + str(reg_eq[1]) + "))"
|
eq_str = "(" + str(base) + "**(" + str(reg_eq[0]) + "*z))*(" + str(base) + "**(" + str(reg_eq[1]) + "))"
|
||||||
|
|
||||||
vals = []
|
vals = []
|
||||||
|
|
||||||
for i in range(len(x)):
|
for i in range(len(x)):
|
||||||
|
|
||||||
z = x[i]
|
z = x[i]
|
||||||
|
|
||||||
exec("vals.append(" + eq_str + ")")
|
exec("vals.append(" + eq_str + ")")
|
||||||
|
|
||||||
_rms = rms(vals, y)
|
_rms = rms(vals, y)
|
||||||
|
|
||||||
r2_d2 = r_squared(vals, y)
|
r2_d2 = r_squared(vals, y)
|
||||||
|
|
||||||
return [eq_str, _rms, r2_d2]
|
return [eq_str, _rms, r2_d2]
|
||||||
@ -660,25 +627,17 @@ def r_squared(predictions, targets): # assumes equal size inputs
|
|||||||
def rms(predictions, targets): # assumes equal size inputs
|
def rms(predictions, targets): # assumes equal size inputs
|
||||||
|
|
||||||
out = 0
|
out = 0
|
||||||
|
|
||||||
_sum = 0
|
_sum = 0
|
||||||
|
|
||||||
avg = 0
|
|
||||||
|
|
||||||
for i in range(0, len(targets), 1):
|
for i in range(0, len(targets), 1):
|
||||||
|
|
||||||
_sum = (targets[i] - predictions[i]) ** 2
|
_sum = (targets[i] - predictions[i]) ** 2
|
||||||
|
|
||||||
avg = _sum/len(targets)
|
return float(math.sqrt(_sum/len(targets)))
|
||||||
|
|
||||||
out = math.sqrt(avg)
|
|
||||||
|
|
||||||
return float(out)
|
|
||||||
|
|
||||||
def calc_overfit(equation, rms_train, r2_train, x_test, y_test):
|
def calc_overfit(equation, rms_train, r2_train, x_test, y_test):
|
||||||
|
|
||||||
#overfit = performance(train) - performance(test) where performance is r^2
|
#performance overfit = performance(train) - performance(test) where performance is r^2
|
||||||
#overfir = error(train) - error(test) where error is rms
|
#error overfit = error(train) - error(test) where error is rms; biased towards smaller values
|
||||||
|
|
||||||
vals = []
|
vals = []
|
||||||
|
|
||||||
@ -696,107 +655,79 @@ def calc_overfit(equation, rms_train, r2_train, x_test, y_test):
|
|||||||
def strip_data(data, mode):
|
def strip_data(data, mode):
|
||||||
|
|
||||||
if mode == "adam": #x is the row number, y are the data
|
if mode == "adam": #x is the row number, y are the data
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if mode == "eve": #x are the data, y is the column number
|
if mode == "eve": #x are the data, y is the column number
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
raise error("mode error")
|
raise error("mode error")
|
||||||
|
|
||||||
def optimize_regression(x, y, _range, resolution):#_range in poly regression is the range of powers tried, and in log/exp it is the inverse of the stepsize taken from -1000 to 1000
|
def optimize_regression(x, y, _range, resolution):#_range in poly regression is the range of powers tried, and in log/exp it is the inverse of the stepsize taken from -1000 to 1000
|
||||||
#usage not: for demonstration purpose only, performance is shit
|
#usage not: for demonstration purpose only, performance is shit
|
||||||
if type(resolution) != int:
|
if type(resolution) != int:
|
||||||
|
|
||||||
raise error("resolution must be int")
|
raise error("resolution must be int")
|
||||||
x = x
|
|
||||||
y = y
|
|
||||||
|
|
||||||
x_train = []
|
x_train = x
|
||||||
y_train = []
|
y_train = y
|
||||||
|
|
||||||
x_test = []
|
x_test = []
|
||||||
y_test = []
|
y_test = []
|
||||||
|
|
||||||
for i in range (0, math.floor(len(x) * 0.4), 1):
|
for i in range (0, math.floor(len(x) * 0.4), 1):
|
||||||
|
|
||||||
index = random.randint(0, len(x) - 1)
|
index = random.randint(0, len(x) - 1)
|
||||||
|
|
||||||
x_test.append(x[index])
|
x_test.append(x[index])
|
||||||
y_test.append(y[index])
|
y_test.append(y[index])
|
||||||
|
|
||||||
x.pop(index)
|
x_train.pop(index)
|
||||||
y.pop(index)
|
y_train.pop(index)
|
||||||
|
|
||||||
x_train = x
|
|
||||||
y_train = y
|
|
||||||
|
|
||||||
#print(x_train, x_test)
|
#print(x_train, x_test)
|
||||||
#print(y_train, y_test)
|
#print(y_train, y_test)
|
||||||
|
|
||||||
eqs = []
|
eqs = []
|
||||||
|
|
||||||
rmss = []
|
rmss = []
|
||||||
|
|
||||||
r2s = []
|
r2s = []
|
||||||
|
|
||||||
for i in range (0, _range + 1, 1):
|
for i in range (0, _range + 1, 1):
|
||||||
|
|
||||||
eqs.append(poly_regression(x_train, y_train, i)[0])
|
eqs.append(poly_regression(x_train, y_train, i)[0])
|
||||||
rmss.append(poly_regression(x_train, y_train, i)[1])
|
rmss.append(poly_regression(x_train, y_train, i)[1])
|
||||||
r2s.append(poly_regression(x_train, y_train, i)[2])
|
r2s.append(poly_regression(x_train, y_train, i)[2])
|
||||||
|
|
||||||
for i in range (1, 100 * resolution + 1):
|
for i in range (1, 100 * resolution + 1):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
eqs.append(exp_regression(x_train, y_train, float(i / resolution))[0])
|
eqs.append(exp_regression(x_train, y_train, float(i / resolution))[0])
|
||||||
rmss.append(exp_regression(x_train, y_train, float(i / resolution))[1])
|
rmss.append(exp_regression(x_train, y_train, float(i / resolution))[1])
|
||||||
r2s.append(exp_regression(x_train, y_train, float(i / resolution))[2])
|
r2s.append(exp_regression(x_train, y_train, float(i / resolution))[2])
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
for i in range (1, 100 * resolution + 1):
|
for i in range (1, 100 * resolution + 1):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
eqs.append(log_regression(x_train, y_train, float(i / resolution))[0])
|
eqs.append(log_regression(x_train, y_train, float(i / resolution))[0])
|
||||||
rmss.append(log_regression(x_train, y_train, float(i / resolution))[1])
|
rmss.append(log_regression(x_train, y_train, float(i / resolution))[1])
|
||||||
r2s.append(log_regression(x_train, y_train, float(i / resolution))[2])
|
r2s.append(log_regression(x_train, y_train, float(i / resolution))[2])
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
for i in range (0, len(eqs), 1): #marks all equations where r2 = 1 as they 95% of the time overfit the data
|
for i in range (0, len(eqs), 1): #marks all equations where r2 = 1 as they 95% of the time overfit the data
|
||||||
|
|
||||||
if r2s[i] == 1:
|
if r2s[i] == 1:
|
||||||
|
|
||||||
eqs[i] = ""
|
eqs[i] = ""
|
||||||
rmss[i] = ""
|
rmss[i] = ""
|
||||||
r2s[i] = ""
|
r2s[i] = ""
|
||||||
|
|
||||||
while True: #removes all equations marked for removal
|
while True: #removes all equations marked for removal
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
eqs.remove('')
|
eqs.remove('')
|
||||||
rmss.remove('')
|
rmss.remove('')
|
||||||
r2s.remove('')
|
r2s.remove('')
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
overfit = []
|
overfit = []
|
||||||
|
|
||||||
for i in range (0, len(eqs), 1):
|
for i in range (0, len(eqs), 1):
|
||||||
|
|
||||||
overfit.append(calc_overfit(eqs[i], rmss[i], r2s[i], x_test, y_test))
|
overfit.append(calc_overfit(eqs[i], rmss[i], r2s[i], x_test, y_test))
|
||||||
|
|
||||||
return eqs, rmss, r2s, overfit
|
return eqs, rmss, r2s, overfit
|
||||||
@ -809,7 +740,6 @@ def basic_analysis(filepath): #assumes that rows are the independent variable an
|
|||||||
column = []
|
column = []
|
||||||
|
|
||||||
for i in range(0, row, 1):
|
for i in range(0, row, 1):
|
||||||
|
|
||||||
column.append(len(data[i]))
|
column.append(len(data[i]))
|
||||||
|
|
||||||
column_max = max(column)
|
column_max = max(column)
|
||||||
@ -844,11 +774,9 @@ def generate_data(filename, x, y, low, high):
|
|||||||
file = open(filename, "w")
|
file = open(filename, "w")
|
||||||
|
|
||||||
for i in range (0, y, 1):
|
for i in range (0, y, 1):
|
||||||
|
|
||||||
temp = ""
|
temp = ""
|
||||||
|
|
||||||
for j in range (0, x - 1, 1):
|
for j in range (0, x - 1, 1):
|
||||||
|
|
||||||
temp = str(random.uniform(low, high)) + "," + temp
|
temp = str(random.uniform(low, high)) + "," + temp
|
||||||
|
|
||||||
temp = temp + str(random.uniform(low, high))
|
temp = temp + str(random.uniform(low, high))
|
||||||
@ -906,18 +834,15 @@ def debug():
|
|||||||
print("--------------------------------")
|
print("--------------------------------")
|
||||||
|
|
||||||
print(poly_regression([1, 2, 3, 4, 5], [1, 2, 4, 8, 16], 2))
|
print(poly_regression([1, 2, 3, 4, 5], [1, 2, 4, 8, 16], 2))
|
||||||
|
|
||||||
print(log_regression([1, 2, 3, 4], [2, 4, 8, 16], 2.717))
|
print(log_regression([1, 2, 3, 4], [2, 4, 8, 16], 2.717))
|
||||||
|
|
||||||
print(exp_regression([1, 2, 3, 4], [2, 4, 8, 16], 2.717))
|
print(exp_regression([1, 2, 3, 4], [2, 4, 8, 16], 2.717))
|
||||||
|
|
||||||
x, y, z = optimize_regression([0, 1, 2, 3, 4], [1, 2, 4, 7, 19], 10, 100)
|
x, y, z = optimize_regression([0, 1, 2, 3, 4], [1, 2, 4, 7, 19], 10, 100)
|
||||||
|
|
||||||
for i in range(0, len(x), 1):
|
for i in range(0, len(x), 1):
|
||||||
|
|
||||||
print(str(x[i]) + " | " + str(y[i]) + " | " + str(z[i]))
|
print(str(x[i]) + " | " + str(y[i]) + " | " + str(z[i]))
|
||||||
|
|
||||||
#statistics def below------------------------------------------------------------------------------------------------------------------------------------------------------
|
#statistics def below
|
||||||
|
|
||||||
class StatisticsError(ValueError):
|
class StatisticsError(ValueError):
|
||||||
pass
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user