started c-ifying analysis

2026-06-10 09:47:31 +00:00 · 2019-04-05 17:24:24 -05:00
parent b91ad29ae4
commit f49bb58215
8 changed files with 290 additions and 224 deletions
@@ -153,9 +153,11 @@ from sklearn import *
 import time
 import torch

+
 class error(ValueError):
    pass

+
 def _init_device(setting, arg):  # initiates computation device for ANNs
    if setting == "cuda":
        try:
@@ -170,6 +172,7 @@ def _init_device (setting, arg): #initiates computation device for ANNs
    else:
        raise error("specified device does not exist")

+
 class c_entities:

    c_names = []
@@ -190,7 +193,6 @@ class c_entities:
        self.c_logic = logic
        return None

-
    def append(self, n_name, n_id, n_pos, n_property, n_logic):
        self.c_names.append(n_name)
        self.c_ids.append(n_id)
@@ -232,6 +234,7 @@ class c_entities:
    def regurgitate(self):
        return[self.c_names, self.c_ids, self.c_pos, self.c_properties, self.c_logic]

+
 class nc_entities:

    c_names = []
@@ -295,6 +298,7 @@ class nc_entities:

        return[self.c_names, self.c_ids, self.c_pos, self.c_properties, self.c_effects]

+
 class obstacles:

    c_names = []
@@ -351,6 +355,7 @@ class obstacles:
    def regurgitate(self):
        return[self.c_names, self.c_ids, self.c_perim, self.c_effects]

+
 class objectives:

    c_names = []
@@ -408,13 +413,16 @@ class objectives:
    def regurgitate(self):
        return[self.c_names, self.c_ids, self.c_pos, self.c_effects]

+
 def load_csv(filepath):
    with open(filepath, newline='') as csvfile:
        file_array = list(csv.reader(csvfile))
        csvfile.close()
    return file_array

-def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, 'column':c_basic_stats, 'row':r_basic_stats], arg for mode 1 or mode 2 for column or row
+
+# data=array, mode = ['1d':1d_basic_stats, 'column':c_basic_stats, 'row':r_basic_stats], arg for mode 1 or mode 2 for column or row
+def basic_stats(data, method, arg):

    if method == 'debug':
        return "basic_stats requires 3 args: data, mode, arg; where data is data to be analyzed, mode is an int from 0 - 2 depending on type of analysis (by column or by row) and is only applicable to 2d arrays (for 1d arrays use mode 1), and arg is row/column number for mode 1 or mode 2; function returns: [mean, median, mode, stdev, variance]"
@@ -498,11 +506,15 @@ def basic_stats(data, method, arg): # data=array, mode = ['1d':1d_basic_stats, '
    else:
        raise error("method error")

-def z_score(point, mean, stdev): #returns z score with inputs of point, mean and standard deviation of spread
+
+# returns z score with inputs of point, mean and standard deviation of spread
+def z_score(point, mean, stdev):
    score = (point - mean) / stdev
    return score

-def z_normalize(x, y, mode): #mode is either 'x' or 'y' or 'both' depending on the variable(s) to be normalized
+
+# mode is either 'x' or 'y' or 'both' depending on the variable(s) to be normalized
+def z_normalize(x, y, mode):

    x_norm = []
    y_norm = []
@@ -543,19 +555,23 @@ def z_normalize(x, y, mode): #mode is either 'x' or 'y' or 'both' depending on t

        return error('method error')

-def stdev_z_split(mean, stdev, delta, low_bound, high_bound): #returns n-th percentile of spread given mean, standard deviation, lower z-score, and upper z-score
+
+# returns n-th percentile of spread given mean, standard deviation, lower z-score, and upper z-score
+def stdev_z_split(mean, stdev, delta, low_bound, high_bound):

    z_split = []
    i = low_bound

    while True:
-        z_split.append(float((1 / (stdev * math.sqrt(2 * math.pi))) * math.e ** (-0.5 * (((i - mean) / stdev) ** 2))))
+        z_split.append(float((1 / (stdev * math.sqrt(2 * math.pi))) *
+                             math.e ** (-0.5 * (((i - mean) / stdev) ** 2))))
        i = i + delta
        if i > high_bound:
            break

    return z_split

+
 def histo_analysis(hist_data, delta, low_bound, high_bound):

    if hist_data == 'debug':
@@ -593,6 +609,7 @@ def histo_analysis(hist_data, delta, low_bound, high_bound):

    return predictions

+
 def poly_regression(x, y, power):

    if x == "null":  # if x is 'null', then x will be filled with integer points between 1 and the size of y
@@ -607,9 +624,11 @@ def poly_regression(x, y, power):

    for i in range(0, len(reg_eq), 1):
        if i < len(reg_eq) - 1:
-            eq_str = eq_str + str(reg_eq[i]) + "*(z**" + str(len(reg_eq) - i - 1) + ")+"
+            eq_str = eq_str + str(reg_eq[i]) + \
+                "*(z**" + str(len(reg_eq) - i - 1) + ")+"
        else:
-            eq_str = eq_str + str(reg_eq[i]) + "*(z**" + str(len(reg_eq) - i - 1) + ")"
+            eq_str = eq_str + str(reg_eq[i]) + \
+                "*(z**" + str(len(reg_eq) - i - 1) + ")"

    vals = []

@@ -626,18 +645,22 @@ def poly_regression(x, y, power):

    return [eq_str, _rms, r2_d2]

+
 def log_regression(x, y, base):

    x_fit = []

    for i in range(len(x)):
        try:
-			x_fit.append(np.log(x[i]) / np.log(base)) #change of base for logs
+            # change of base for logs
+            x_fit.append(np.log(x[i]) / np.log(base))
        except:
            pass

-	reg_eq = np.polyfit(x_fit, y, 1) # y = reg_eq[0] * log(x, base) + reg_eq[1]
-	q_str = str(reg_eq[0]) + "* (np.log(z) / np.log(" + str(base) +"))+" + str(reg_eq[1])
+    # y = reg_eq[0] * log(x, base) + reg_eq[1]
+    reg_eq = np.polyfit(x_fit, y, 1)
+    q_str = str(reg_eq[0]) + "* (np.log(z) / np.log(" + \
+        str(base) + "))+" + str(reg_eq[1])
    vals = []

    for i in range(len(x)):
@@ -653,18 +676,22 @@ def log_regression(x, y, base):

    return eq_str, _rms, r2_d2

+
 def exp_regression(x, y, base):

    y_fit = []

    for i in range(len(y)):
        try:
-			y_fit.append(np.log(y[i]) / np.log(base)) #change of base for logs
+            # change of base for logs
+            y_fit.append(np.log(y[i]) / np.log(base))
        except:
            pass

-	reg_eq = np.polyfit(x, y_fit, 1, w=np.sqrt(y_fit)) # y = base ^ (reg_eq[0] * x) * base ^ (reg_eq[1])
-	eq_str = "(" + str(base) + "**(" + str(reg_eq[0]) + "*z))*(" + str(base) + "**(" + str(reg_eq[1]) + "))"
+    # y = base ^ (reg_eq[0] * x) * base ^ (reg_eq[1])
+    reg_eq = np.polyfit(x, y_fit, 1, w=np.sqrt(y_fit))
+    eq_str = "(" + str(base) + "**(" + \
+        str(reg_eq[0]) + "*z))*(" + str(base) + "**(" + str(reg_eq[1]) + "))"
    vals = []

    for i in range(len(x)):
@@ -680,6 +707,7 @@ def exp_regression(x, y, base):

    return eq_str, _rms, r2_d2

+
 def tanh_regression(x, y):

    def tanh(x, a, b, c, d):
@@ -687,7 +715,8 @@ def tanh_regression(x, y):
        return a * np.tanh(b * (x - c)) + d

    reg_eq = np.float64(curve_fit(tanh, np.array(x), np.array(y))[0]).tolist()
-	eq_str = str(reg_eq[0]) + " * np.tanh(" + str(reg_eq[1]) + "*(z - " + str(reg_eq[2]) + ")) + " + str(reg_eq[3])
+    eq_str = str(reg_eq[0]) + " * np.tanh(" + str(reg_eq[1]) + \
+        "*(z - " + str(reg_eq[2]) + ")) + " + str(reg_eq[3])
    vals = []

    for i in range(len(x)):
@@ -702,10 +731,12 @@ def tanh_regression(x, y):

    return eq_str, _rms, r2_d2

+
 def r_squared(predictions, targets):  # assumes equal size inputs

    return metrics.r2_score(np.array(targets), np.array(predictions))

+
 def rms(predictions, targets):  # assumes equal size inputs

    _sum = 0
@@ -715,6 +746,7 @@ def rms(predictions, targets): # assumes equal size inputs

    return float(math.sqrt(_sum / len(targets)))

+
 def calc_overfit(equation, rms_train, r2_train, x_test, y_test):

    # performance overfit = performance(train) - performance(test) where performance is r^2
@@ -733,6 +765,7 @@ def calc_overfit(equation, rms_train, r2_train, x_test, y_test):

    return r2_train - r2_test

+
 def strip_data(data, mode):

    if mode == "adam":  # x is the row number, y are the data
@@ -744,7 +777,9 @@ def strip_data(data, mode):
    else:
        raise error("mode error")

-def optimize_regression(x, y, _range, resolution):#_range in poly regression is the range of powers tried, and in log/exp it is the inverse of the stepsize taken from -1000 to 1000
+
+# _range in poly regression is the range of powers tried, and in log/exp it is the inverse of the stepsize taken from -1000 to 1000
+def optimize_regression(x, y, _range, resolution):
    # usage not: for demonstration purpose only, performance is shit
    if type(resolution) != int:
        raise error("resolution must be int")
@@ -810,7 +845,8 @@ def optimize_regression(x, y, _range, resolution):#_range in poly regression is
    except:
        pass

-    for i in range (0, len(eqs), 1): #marks all equations where r2 = 1 as they 95% of the time overfit the data
+    # marks all equations where r2 = 1 as they 95% of the time overfit the data
+    for i in range(0, len(eqs), 1):
        if r2s[i] == 1:
            eqs[i] = ""
            rmss[i] = ""
@@ -832,6 +868,7 @@ def optimize_regression(x, y, _range, resolution):#_range in poly regression is

    return eqs, rmss, r2s, overfit

+
 def select_best_regression(eqs, rmss, r2s, overfit, selector):

    b_eq = ""
@@ -860,11 +897,14 @@ def select_best_regression(eqs, rmss, r2s, overfit, selector):

    return b_eq, b_rms, b_r2, b_overfit

+
 def p_value(x, y):  # takes 2 1d arrays

    return stats.ttest_ind(x, y)[1]

-def basic_analysis(data): #assumes that rows are the independent variable and columns are the dependant. also assumes that time flows from lowest column to highest column.
+
+# assumes that rows are the independent variable and columns are the dependant. also assumes that time flows from lowest column to highest column.
+def basic_analysis(data):

    row = len(data)
    column = []
@@ -900,6 +940,7 @@ def benchmark(x, y):

    return [(end_g - start_g), (end_a - start_a)]

+
 def generate_data(filename, x, y, low, high):

    file = open(filename, "w")
@@ -913,9 +954,11 @@ def generate_data(filename, x, y, low, high):
        temp = temp + str(random.uniform(low, high))
        file.write(temp + "\n")

+
 class StatisticsError(ValueError):
    pass

+
 def _sum(data, start=0):
    count = 0
    n, d = _exact_ratio(start)
@@ -936,26 +979,35 @@ def _sum(data, start=0):
        total = sum(Fraction(n, d) for d, n in sorted(partials.items()))
    return (T, total, count)

+
 def _isfinite(x):
    try:
        return x.is_finite()  # Likely a Decimal.
    except AttributeError:
        return math.isfinite(x)  # Coerces to float first.

+
 def _coerce(T, S):

    assert T is not bool, "initial type T is bool"

-    if T is S:  return T
+    if T is S:
+        return T

-    if S is int or S is bool:  return T
-    if T is int:  return S
+    if S is int or S is bool:
+        return T
+    if T is int:
+        return S

-    if issubclass(S, T):  return S
-    if issubclass(T, S):  return T
+    if issubclass(S, T):
+        return S
+    if issubclass(T, S):
+        return T

-    if issubclass(T, int):  return S
-    if issubclass(S, int):  return T
+    if issubclass(T, int):
+        return S
+    if issubclass(S, int):
+        return T

    if issubclass(T, Fraction) and issubclass(S, float):
        return S
@@ -965,6 +1017,7 @@ def _coerce(T, S):
    msg = "don't know how to coerce %s and %s"
    raise TypeError(msg % (T.__name__, S.__name__))

+
 def _exact_ratio(x):

    try:
@@ -988,6 +1041,7 @@ def _exact_ratio(x):
    msg = "can't convert type '{}' to numerator/denominator"
    raise TypeError(msg.format(type(x).__name__))

+
 def _convert(value, T):

    if type(value) is T:
@@ -1004,6 +1058,7 @@ def _convert(value, T):
        else:
            raise

+
 def _counts(data):

    table = collections.Counter(iter(data)).most_common()
@@ -1041,6 +1096,7 @@ def _fail_neg(values, errmsg='negative value'):
            raise StatisticsError(errmsg)
        yield x

+
 def mean(data):

    if iter(data) is data:
@@ -1052,6 +1108,7 @@ def mean(data):
    assert count == n
    return _convert(total / n, T)

+
 def median(data):

    data = sorted(data)
@@ -1064,6 +1121,7 @@ def median(data):
        i = n // 2
        return (data[i - 1] + data[i]) / 2

+
 def mode(data):

    table = _counts(data)
@@ -1076,6 +1134,7 @@ def mode(data):
    else:
        raise StatisticsError('no mode for empty data')

+
 def _ss(data, c=None):

    if c is None:
@@ -1088,6 +1147,7 @@ def _ss(data, c=None):
    assert not total < 0, 'negative sum of square deviations: %f' % total
    return (T, total)

+
 def variance(data, xbar=None):

    if iter(data) is data:
@@ -1098,6 +1158,7 @@ def variance(data, xbar=None):
    T, ss = _ss(data, xbar)
    return _convert(ss / (n - 1), T)

+
 def stdev(data, xbar=None):

    var = variance(data, xbar)
@@ -0,0 +1,5 @@
+from distutils.core import setup
+from Cython.Build import cythonize
+
+setup(name='analysis',
+      ext_modules=cythonize("analysis.py"))