submission
This commit is contained in:
parent
f9e4c03b85
commit
8a5166867f
@ -87,7 +87,15 @@ class BPRbatch(tf.keras.Model):
|
|||||||
|
|
||||||
# Prediction for a single instance
|
# Prediction for a single instance
|
||||||
def predict(self, u, i):
|
def predict(self, u, i):
|
||||||
p = self.betaI[i] + tf.tensordot(self.gammaU[u], self.gammaI[i], 1)
|
bi = self.bi
|
||||||
|
gu = self.gu
|
||||||
|
gi = self.gi
|
||||||
|
if u != None:
|
||||||
|
gu = self.gammaU[u]
|
||||||
|
if i != None:
|
||||||
|
bi = self.betaI[i]
|
||||||
|
gi = self.gammaI[i]
|
||||||
|
p = bi + tf.tensordot(gu, gi, 1)
|
||||||
return p
|
return p
|
||||||
|
|
||||||
# Regularizer
|
# Regularizer
|
||||||
@ -109,6 +117,11 @@ class BPRbatch(tf.keras.Model):
|
|||||||
x_ui = self.score(sampleU, sampleI)
|
x_ui = self.score(sampleU, sampleI)
|
||||||
x_uj = self.score(sampleU, sampleJ)
|
x_uj = self.score(sampleU, sampleJ)
|
||||||
return -tf.reduce_mean(tf.math.log(tf.math.sigmoid(x_ui - x_uj)))
|
return -tf.reduce_mean(tf.math.log(tf.math.sigmoid(x_ui - x_uj)))
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
self.bi = np.average(self.betaI, axis=0)
|
||||||
|
self.gu = np.average(self.gammaU, axis=0)
|
||||||
|
self.gi = np.average(self.gammaI, axis=0)
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# ### Play Predictor
|
# ### Play Predictor
|
||||||
@ -119,9 +132,7 @@ class PlayPredictor:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def fit(self, data, threshold=0.6, K=5, iters=100): # data is an array of (user, game, review) tuples
|
def fit(self, data, K=5, iters=100): # data is an array of (user, game, review) tuples
|
||||||
self.topGames = self.getTopGames(threshold)
|
|
||||||
|
|
||||||
self.userIDs = {}
|
self.userIDs = {}
|
||||||
self.itemIDs = {}
|
self.itemIDs = {}
|
||||||
interactions = []
|
interactions = []
|
||||||
@ -166,47 +177,33 @@ class PlayPredictor:
|
|||||||
for i in range(iters):
|
for i in range(iters):
|
||||||
obj = trainingStepBPR(self.modelBPR, interactions)
|
obj = trainingStepBPR(self.modelBPR, interactions)
|
||||||
if (i % 10 == 9): print("iteration " + str(i+1) + ", objective = " + str(obj))
|
if (i % 10 == 9): print("iteration " + str(i+1) + ", objective = " + str(obj))
|
||||||
|
|
||||||
|
self.modelBPR.finalize()
|
||||||
|
|
||||||
def predict(self, user, game, threshold=0.5):
|
def predict(self, user, game, threshold=0.5):
|
||||||
if user in self.userIDs and game in self.itemIDs:
|
uid = None
|
||||||
pred = self.modelBPR.predict(self.userIDs[user], self.itemIDs[game]).numpy()
|
gid = None
|
||||||
return int(pred > threshold)
|
if user in self.userIDs:
|
||||||
else:
|
uid = self.userIDs[user]
|
||||||
return int(game in self.topGames)
|
if game in self.itemIDs:
|
||||||
|
gid = self.itemIDs[game]
|
||||||
def getTopGames (self, threshold):
|
pred = self.modelBPR.predict(uid, gid).numpy()
|
||||||
gameCount = defaultdict(int)
|
return int(pred > threshold)
|
||||||
totalPlayed = 0
|
|
||||||
|
|
||||||
for user,game,_ in readJSON("train.json.gz"):
|
|
||||||
gameCount[game] += 1
|
|
||||||
totalPlayed += 1
|
|
||||||
|
|
||||||
mostPopular = [(gameCount[x], x) for x in gameCount]
|
|
||||||
mostPopular.sort()
|
|
||||||
mostPopular.reverse()
|
|
||||||
|
|
||||||
return1 = set()
|
|
||||||
count = 0
|
|
||||||
for ic, i in mostPopular:
|
|
||||||
count += ic
|
|
||||||
return1.add(i)
|
|
||||||
if count > totalPlayed * threshold: break
|
|
||||||
return return1
|
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
model = PlayPredictor()
|
model = PlayPredictor()
|
||||||
model.fit(train, K=6, iters=200)
|
model.fit(train, K=6, iters=200)
|
||||||
|
|
||||||
error = 0
|
# %%
|
||||||
|
CM = np.array([[0,0], [0,0]])
|
||||||
balanced_valid = get_balanced_set(dataset, valid)
|
balanced_valid = get_balanced_set(dataset, valid)
|
||||||
for user, game, review in balanced_valid:
|
for user, game, review in balanced_valid:
|
||||||
pred = model.predict(user, game, threshold=0.5)
|
pred = model.predict(user, game, threshold=0.5)
|
||||||
if pred != review["played"]:
|
CM[review["played"]][pred] += 1
|
||||||
error += 1
|
|
||||||
|
|
||||||
print(f"PlayPredictor accuracy: ", 1 - error / len(balanced_valid))
|
print(CM)
|
||||||
|
print(f"PlayPredictor accuracy: ", 1 - (CM[1][0] + CM[0][1]) / len(balanced_valid))
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
writePredictions("pairs_Played.csv", "predictions_Played.csv", model)
|
writePredictions("pairs_Played.csv", "predictions_Played.csv", model)
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
For all models, I used the default train/test split ussed in hw3.
|
||||||
|
|
||||||
|
For Play Prediction I found that the Bayseian Personalized Ranking produced a decent error rate. To adapt the ranking to create binary predictions, I use a threshold hyperparameter, and compared the model's prediction vlaue to.
|
||||||
|
|
||||||
|
For Time Prediction, I could not find a model that significantly beat the baseline. However, through tuning hyperparameters, I was able to decrease the MSE by a small margin.
|
Reference in New Issue
Block a user