submission

This commit is contained in:
ltcptgeneral 2023-11-19 09:47:44 -08:00
parent f9e4c03b85
commit 8a5166867f
2 changed files with 35 additions and 33 deletions

View File

@ -87,7 +87,15 @@ class BPRbatch(tf.keras.Model):
# Prediction for a single instance
def predict(self, u, i):
p = self.betaI[i] + tf.tensordot(self.gammaU[u], self.gammaI[i], 1)
bi = self.bi
gu = self.gu
gi = self.gi
if u != None:
gu = self.gammaU[u]
if i != None:
bi = self.betaI[i]
gi = self.gammaI[i]
p = bi + tf.tensordot(gu, gi, 1)
return p
# Regularizer
@ -109,6 +117,11 @@ class BPRbatch(tf.keras.Model):
x_ui = self.score(sampleU, sampleI)
x_uj = self.score(sampleU, sampleJ)
return -tf.reduce_mean(tf.math.log(tf.math.sigmoid(x_ui - x_uj)))
def finalize(self):
self.bi = np.average(self.betaI, axis=0)
self.gu = np.average(self.gammaU, axis=0)
self.gi = np.average(self.gammaI, axis=0)
# %% [markdown]
# ### Play Predictor
@ -119,9 +132,7 @@ class PlayPredictor:
def __init__(self):
pass
def fit(self, data, threshold=0.6, K=5, iters=100): # data is an array of (user, game, review) tuples
self.topGames = self.getTopGames(threshold)
def fit(self, data, K=5, iters=100): # data is an array of (user, game, review) tuples
self.userIDs = {}
self.itemIDs = {}
interactions = []
@ -166,47 +177,33 @@ class PlayPredictor:
for i in range(iters):
obj = trainingStepBPR(self.modelBPR, interactions)
if (i % 10 == 9): print("iteration " + str(i+1) + ", objective = " + str(obj))
self.modelBPR.finalize()
def predict(self, user, game, threshold=0.5):
if user in self.userIDs and game in self.itemIDs:
pred = self.modelBPR.predict(self.userIDs[user], self.itemIDs[game]).numpy()
return int(pred > threshold)
else:
return int(game in self.topGames)
def getTopGames (self, threshold):
gameCount = defaultdict(int)
totalPlayed = 0
for user,game,_ in readJSON("train.json.gz"):
gameCount[game] += 1
totalPlayed += 1
mostPopular = [(gameCount[x], x) for x in gameCount]
mostPopular.sort()
mostPopular.reverse()
return1 = set()
count = 0
for ic, i in mostPopular:
count += ic
return1.add(i)
if count > totalPlayed * threshold: break
return return1
uid = None
gid = None
if user in self.userIDs:
uid = self.userIDs[user]
if game in self.itemIDs:
gid = self.itemIDs[game]
pred = self.modelBPR.predict(uid, gid).numpy()
return int(pred > threshold)
# %%
model = PlayPredictor()
model.fit(train, K=6, iters=200)
error = 0
# %%
CM = np.array([[0,0], [0,0]])
balanced_valid = get_balanced_set(dataset, valid)
for user, game, review in balanced_valid:
pred = model.predict(user, game, threshold=0.5)
if pred != review["played"]:
error += 1
CM[review["played"]][pred] += 1
print(f"PlayPredictor accuracy: ", 1 - error / len(balanced_valid))
print(CM)
print(f"PlayPredictor accuracy: ", 1 - (CM[1][0] + CM[0][1]) / len(balanced_valid))
# %%
writePredictions("pairs_Played.csv", "predictions_Played.csv", model)

View File

@ -0,0 +1,5 @@
For all models, I used the default train/test split ussed in hw3.
For Play Prediction I found that the Bayseian Personalized Ranking produced a decent error rate. To adapt the ranking to create binary predictions, I use a threshold hyperparameter, and compared the model's prediction vlaue to.
For Time Prediction, I could not find a model that significantly beat the baseline. However, through tuning hyperparameters, I was able to decrease the MSE by a small margin.