Merge pull request #28 from titanscout2022/master-staged

Merge analysis.py updates to master
This commit is contained in:
Arthur Lu 2020-07-12 18:26:03 -05:00 committed by GitHub
commit f062c038ec
3 changed files with 480 additions and 4 deletions

View File

@ -7,10 +7,26 @@
# current benchmark of optimization: 1.33 times faster # current benchmark of optimization: 1.33 times faster
# setup: # setup:
__version__ = "1.2.1.004" __version__ = "1.2.2.000"
# changelog should be viewed using print(analysis.__changelog__) # changelog should be viewed using print(analysis.__changelog__)
__changelog__ = """changelog: __changelog__ = """changelog:
1.2.2.000:
- added Sort class
- added several array sorting functions to Sort class including:
- quick sort
- merge sort
- intro(spective) sort
- heap sort
- insertion sort
- tim sort
- selection sort
- bubble sort
- cycle sort
- cocktail sort
- tested all sorting algorithms with both lists and numpy arrays
- depreciated sort function from Array class
- added warnings as an import
1.2.1.004: 1.2.1.004:
- added sort and search functions to Array class - added sort and search functions to Array class
1.2.1.003: 1.2.1.003:
@ -340,6 +356,7 @@ from scipy import optimize, stats
import sklearn import sklearn
from sklearn import preprocessing, pipeline, linear_model, metrics, cluster, decomposition, tree, neighbors, naive_bayes, svm, model_selection, ensemble from sklearn import preprocessing, pipeline, linear_model, metrics, cluster, decomposition, tree, neighbors, naive_bayes, svm, model_selection, ensemble
from analysis.metrics import trueskill as Trueskill from analysis.metrics import trueskill as Trueskill
import warnings
class error(ValueError): class error(ValueError):
pass pass
@ -1035,7 +1052,8 @@ class Array(): # tests on nd arrays independent of basic_stats
return np.cross(a, b) return np.cross(a, b)
def sort(self, array): def sort(self, array): # depreciated
warnings.warn("Array.sort has been depreciated in favor of Sort")
array_length = len(array) array_length = len(array)
if array_length <= 1: if array_length <= 1:
return array return array
@ -1076,4 +1094,394 @@ class Array(): # tests on nd arrays independent of basic_stats
else: else:
return binary_search(arr, mid + 1, high, x) return binary_search(arr, mid + 1, high, x)
else: else:
return -1 return -1
class Sort: # if you haven't used a sort, then you've never lived
def quicksort(self, a):
def sort(array):
less = []
equal = []
greater = []
if len(array) > 1:
pivot = array[0]
for x in array:
if x < pivot:
less.append(x)
elif x == pivot:
equal.append(x)
elif x > pivot:
greater.append(x)
return sort(less)+equal+sort(greater)
else:
return array
return np.array(sort(a))
def mergesort(self, a):
def sort(array):
array = array
if len(array) >1:
middle = len(array) // 2
L = array[:middle]
R = array[middle:]
sort(L)
sort(R)
i = j = k = 0
while i < len(L) and j < len(R):
if L[i] < R[j]:
array[k] = L[i]
i+= 1
else:
array[k] = R[j]
j+= 1
k+= 1
while i < len(L):
array[k] = L[i]
i+= 1
k+= 1
while j < len(R):
array[k] = R[j]
j+= 1
k+= 1
return array
return sort(a)
def introsort(self, a):
def sort(array, start, end, maxdepth):
array = array
if end - start <= 1:
return
elif maxdepth == 0:
heapsort(array, start, end)
else:
p = partition(array, start, end)
sort(array, start, p + 1, maxdepth - 1)
sort(array, p + 1, end, maxdepth - 1)
return array
def partition(array, start, end):
pivot = array[start]
i = start - 1
j = end
while True:
i = i + 1
while array[i] < pivot:
i = i + 1
j = j - 1
while array[j] > pivot:
j = j - 1
if i >= j:
return j
swap(array, i, j)
def swap(array, i, j):
array[i], array[j] = array[j], array[i]
def heapsort(array, start, end):
build_max_heap(array, start, end)
for i in range(end - 1, start, -1):
swap(array, start, i)
max_heapify(array, index=0, start=start, end=i)
def build_max_heap(array, start, end):
def parent(i):
return (i - 1)//2
length = end - start
index = parent(length - 1)
while index >= 0:
max_heapify(array, index, start, end)
index = index - 1
def max_heapify(array, index, start, end):
def left(i):
return 2*i + 1
def right(i):
return 2*i + 2
size = end - start
l = left(index)
r = right(index)
if (l < size and array[start + l] > array[start + index]):
largest = l
else:
largest = index
if (r < size and array[start + r] > array[start + largest]):
largest = r
if largest != index:
swap(array, start + largest, start + index)
max_heapify(array, largest, start, end)
maxdepth = (len(a).bit_length() - 1)*2
return sort(a, 0, len(a), maxdepth)
def heapsort(self, a):
def sort(array):
array = array
n = len(array)
for i in range(n//2 - 1, -1, -1):
heapify(array, n, i)
for i in range(n-1, 0, -1):
array[i], array[0] = array[0], array[i]
heapify(array, i, 0)
return array
def heapify(array, n, i):
array = array
largest = i
l = 2 * i + 1
r = 2 * i + 2
if l < n and array[i] < array[l]:
largest = l
if r < n and array[largest] < array[r]:
largest = r
if largest != i:
array[i],array[largest] = array[largest],array[i]
heapify(array, n, largest)
return array
return sort(a)
def insertionsort(self, a):
def sort(array):
array = array
for i in range(1, len(array)):
key = array[i]
j = i-1
while j >=0 and key < array[j] :
array[j+1] = array[j]
j -= 1
array[j+1] = key
return array
return sort(a)
def timsort(self, a, block = 32):
BLOCK = block
def sort(array, n):
array = array
for i in range(0, n, BLOCK):
insertionsort(array, i, min((i+31), (n-1)))
size = BLOCK
while size < n:
for left in range(0, n, 2*size):
mid = left + size - 1
right = min((left + 2*size - 1), (n-1))
merge(array, left, mid, right)
size = 2*size
return array
def insertionsort(array, left, right):
array = array
for i in range(left + 1, right+1):
temp = array[i]
j = i - 1
while j >= left and array[j] > temp :
array[j+1] = array[j]
j -= 1
array[j+1] = temp
return array
def merge(array, l, m, r):
len1, len2 = m - l + 1, r - m
left, right = [], []
for i in range(0, len1):
left.append(array[l + i])
for i in range(0, len2):
right.append(array[m + 1 + i])
i, j, k = 0, 0, l
while i < len1 and j < len2:
if left[i] <= right[j]:
array[k] = left[i]
i += 1
else:
array[k] = right[j]
j += 1
k += 1
while i < len1:
array[k] = left[i]
k += 1
i += 1
while j < len2:
array[k] = right[j]
k += 1
j += 1
return sort(a, len(a))
def selectionsort(self, a):
array = a
for i in range(len(array)):
min_idx = i
for j in range(i+1, len(array)):
if array[min_idx] > array[j]:
min_idx = j
array[i], array[min_idx] = array[min_idx], array[i]
return array
def shellsort(self, a):
array = a
n = len(array)
gap = n//2
while gap > 0:
for i in range(gap,n):
temp = array[i]
j = i
while j >= gap and array[j-gap] >temp:
array[j] = array[j-gap]
j -= gap
array[j] = temp
gap //= 2
return array
def bubblesort(self, a):
def sort(array):
for i, num in enumerate(array):
try:
if array[i+1] < num:
array[i] = array[i+1]
array[i+1] = num
sort(array)
except IndexError:
pass
return array
return sort(a)
def cyclesort(self, a):
def sort(array):
array = array
writes = 0
for cycleStart in range(0, len(array) - 1):
item = array[cycleStart]
pos = cycleStart
for i in range(cycleStart + 1, len(array)):
if array[i] < item:
pos += 1
if pos == cycleStart:
continue
while item == array[pos]:
pos += 1
array[pos], item = item, array[pos]
writes += 1
while pos != cycleStart:
pos = cycleStart
for i in range(cycleStart + 1, len(array)):
if array[i] < item:
pos += 1
while item == array[pos]:
pos += 1
array[pos], item = item, array[pos]
writes += 1
return array
return sort(a)
def cocktailsort(self, a):
def sort(array):
array = array
n = len(array)
swapped = True
start = 0
end = n-1
while (swapped == True):
swapped = False
for i in range (start, end):
if (array[i] > array[i + 1]) :
array[i], array[i + 1]= array[i + 1], array[i]
swapped = True
if (swapped == False):
break
swapped = False
end = end-1
for i in range(end-1, start-1, -1):
if (array[i] > array[i + 1]):
array[i], array[i + 1] = array[i + 1], array[i]
swapped = True
start = start + 1
return array
return sort(a)

View File

@ -5,6 +5,8 @@ def test_():
test_data_linear = [1, 3, 6, 7, 9] test_data_linear = [1, 3, 6, 7, 9]
y_data_ccu = [1, 3, 7, 14, 21] y_data_ccu = [1, 3, 7, 14, 21]
y_data_ccd = [1, 5, 7, 8.5, 8.66] y_data_ccd = [1, 5, 7, 8.5, 8.66]
test_data_scrambled = [-32, 34, 19, 72, -65, -11, -43, 6, 85, -17, -98, -26, 12, 20, 9, -92, -40, 98, -78, 17, -20, 49, 93, -27, -24, -66, 40, 84, 1, -64, -68, -25, -42, -46, -76, 43, -3, 30, -14, -34, -55, -13, 41, -30, 0, -61, 48, 23, 60, 87, 80, 77, 53, 73, 79, 24, -52, 82, 8, -44, 65, 47, -77, 94, 7, 37, -79, 36, -94, 91, 59, 10, 97, -38, -67, 83, 54, 31, -95, -63, 16, -45, 21, -12, 66, -48, -18, -96, -90, -21, -83, -74, 39, 64, 69, -97, 13, 55, 27, -39]
test_data_sorted = [-98, -97, -96, -95, -94, -92, -90, -83, -79, -78, -77, -76, -74, -68, -67, -66, -65, -64, -63, -61, -55, -52, -48, -46, -45, -44, -43, -42, -40, -39, -38, -34, -32, -30, -27, -26, -25, -24, -21, -20, -18, -17, -14, -13, -12, -11, -3, 0, 1, 6, 7, 8, 9, 10, 12, 13, 16, 17, 19, 20, 21, 23, 24, 27, 30, 31, 34, 36, 37, 39, 40, 41, 43, 47, 48, 49, 53, 54, 55, 59, 60, 64, 65, 66, 69, 72, 73, 77, 79, 80, 82, 83, 84, 85, 87, 91, 93, 94, 97, 98]
assert an.basic_stats(test_data_linear) == {"mean": 5.2, "median": 6.0, "standard-deviation": 2.85657137141714, "variance": 8.16, "minimum": 1.0, "maximum": 9.0} assert an.basic_stats(test_data_linear) == {"mean": 5.2, "median": 6.0, "standard-deviation": 2.85657137141714, "variance": 8.16, "minimum": 1.0, "maximum": 9.0}
assert an.z_score(3.2, 6, 1.5) == -1.8666666666666665 assert an.z_score(3.2, 6, 1.5) == -1.8666666666666665
assert an.z_normalize([test_data_linear], 1).tolist() == [[0.07537783614444091, 0.22613350843332272, 0.45226701686664544, 0.5276448530110863, 0.6784005252999682]] assert an.z_normalize([test_data_linear], 1).tolist() == [[0.07537783614444091, 0.22613350843332272, 0.45226701686664544, 0.5276448530110863, 0.6784005252999682]]
@ -15,4 +17,15 @@ def test_():
#assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccd, ["sig"])) == True #assert all(isinstance(item, str) for item in an.regression(test_data_linear, y_data_ccd, ["sig"])) == True
assert an.Metric().elo(1500, 1500, [1, 0], 400, 24) == 1512.0 assert an.Metric().elo(1500, 1500, [1, 0], 400, 24) == 1512.0
assert an.Metric().glicko2(1500, 250, 0.06, [1500, 1400], [250, 240], [1, 0]) == (1478.864307445517, 195.99122679202452, 0.05999602937563585) assert an.Metric().glicko2(1500, 250, 0.06, [1500, 1400], [250, 240], [1, 0]) == (1478.864307445517, 195.99122679202452, 0.05999602937563585)
#assert an.Metric().trueskill([[(25, 8.33), (24, 8.25), (32, 7.5)], [(25, 8.33), (25, 8.33), (21, 6.5)]], [1, 0]) == [(metrics.trueskill.Rating(mu=21.346, sigma=7.875), metrics.trueskill.Rating(mu=20.415, sigma=7.808), metrics.trueskill.Rating(mu=29.037, sigma=7.170)), (metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=23.225, sigma=6.287))] #assert an.Metric().trueskill([[(25, 8.33), (24, 8.25), (32, 7.5)], [(25, 8.33), (25, 8.33), (21, 6.5)]], [1, 0]) == [(metrics.trueskill.Rating(mu=21.346, sigma=7.875), metrics.trueskill.Rating(mu=20.415, sigma=7.808), metrics.trueskill.Rating(mu=29.037, sigma=7.170)), (metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=28.654, sigma=7.875), metrics.trueskill.Rating(mu=23.225, sigma=6.287))]
assert all(a == b for a, b in zip(an.Sort().quicksort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().mergesort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().introsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().heapsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().insertionsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().timsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().selectionsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().shellsort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().bubblesort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().cyclesort(test_data_scrambled), test_data_sorted))
assert all(a == b for a, b in zip(an.Sort().cocktailsort(test_data_scrambled), test_data_sorted))

55
data-analysis/test.py Normal file
View File

@ -0,0 +1,55 @@
import threading
from multiprocessing import Process, Queue
import time
from os import system
class testcls():
i = 0
j = 0
t1_en = True
t2_en = True
def main(self):
t1 = Process(name = "task1", target = self.task1)
t2 = Process(name = "task2", target = self.task2)
t1.start()
t2.start()
#print(self.i)
#print(self.j)
def task1(self):
self.i += 1
time.sleep(1)
if(self.i < 10):
t1 = Process(name = "task1", target = self.task1)
t1.start()
def task2(self):
self.j -= 1
time.sleep(1)
if(self.j > -10):
t2 = t2 = Process(name = "task2", target = self.task2)
t2.start()
"""
if __name__ == "__main__":
tmain = threading.Thread(name = "main", target = main)
tmain.start()
t = 0
while(True):
system("clear")
for thread in threading.enumerate():
if thread.getName() != "MainThread":
print(thread.getName())
print(str(len(threading.enumerate())))
print(i)
print(j)
time.sleep(0.1)
t += 1
if(t == 100):
t1_en = False
t2_en = False
"""