mirror of
https://github.com/ltcptgeneral/cse151b-final-project.git
synced 2024-11-09 22:54:45 +00:00
38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
import string
|
|
|
|
import numpy as np
|
|
|
|
words = []
|
|
with open('words.txt', 'r') as f:
|
|
for l in f:
|
|
words.append(l.strip())
|
|
|
|
# Count letter frequencies at each index
|
|
letter_freqs = [{letter: 0 for letter in string.ascii_lowercase} for _ in range(5)]
|
|
for word in words:
|
|
for i, l in enumerate(word):
|
|
letter_freqs[i][l] += 1
|
|
|
|
# Assign a score to each letter at each index by the probability of it appearing
|
|
letter_scores = [{letter: 0 for letter in string.ascii_lowercase} for _ in range(5)]
|
|
for i in range(len(letter_scores)):
|
|
max_freq = np.max(list(letter_freqs[i].values()))
|
|
for l in letter_scores[i].keys():
|
|
letter_scores[i][l] = letter_freqs[i][l] / max_freq
|
|
|
|
# Find a sorted list of words ranked by sum of letter scores
|
|
word_scores = [] # (score, word)
|
|
for word in words:
|
|
score = 0
|
|
for i, l in enumerate(word):
|
|
score += letter_scores[i][l]
|
|
word_scores.append((score, word))
|
|
|
|
sorted_by_second = sorted(word_scores, key=lambda tup: tup[0])[::-1]
|
|
print(sorted_by_second[:10])
|
|
|
|
for i, (score, word) in enumerate(sorted_by_second):
|
|
if word == 'soare':
|
|
print(f'{word} with a score of {score} is found at index {i}')
|
|
|