submission

2023-11-19 09:47:44 -08:00 · 2023-11-19 09:47:44 -08:00 · 8a5166867f
commit 8a5166867f
parent f9e4c03b85
2 changed files with 35 additions and 33 deletions
--- a/assignment1.py
+++ b/assignment1.py
@ -87,7 +87,15 @@ class BPRbatch(tf.keras.Model):
    # Prediction for a single instance
    def predict(self, u, i):
-        p = self.betaI[i] + tf.tensordot(self.gammaU[u], self.gammaI[i], 1)
+        bi = self.bi
        gu = self.gu
        gi = self.gi
        if u != None:
            gu = self.gammaU[u]
        if i != None:
            bi = self.betaI[i]
            gi = self.gammaI[i]
        p = bi + tf.tensordot(gu, gi, 1)
        return p
    # Regularizer
@ -110,6 +118,11 @@ class BPRbatch(tf.keras.Model):
        x_uj = self.score(sampleU, sampleJ)
        return -tf.reduce_mean(tf.math.log(tf.math.sigmoid(x_ui - x_uj)))
    def finalize(self):
        self.bi = np.average(self.betaI, axis=0)
        self.gu = np.average(self.gammaU, axis=0)
        self.gi = np.average(self.gammaI, axis=0)
 # %% [markdown]
 # ### Play Predictor
@ -119,9 +132,7 @@ class PlayPredictor:
    def __init__(self):
        pass
-    def fit(self, data, threshold=0.6, K=5, iters=100): # data is an array of (user, game, review) tuples
+    def fit(self, data, K=5, iters=100): # data is an array of (user, game, review) tuples
        self.topGames = self.getTopGames(threshold)
        self.userIDs = {}
        self.itemIDs = {}
        interactions = []
@ -167,46 +178,32 @@ class PlayPredictor:
            obj = trainingStepBPR(self.modelBPR, interactions)
            if (i % 10 == 9): print("iteration " + str(i+1) + ", objective = " + str(obj))
        self.modelBPR.finalize()
    def predict(self, user, game, threshold=0.5):
-        if user in self.userIDs and game in self.itemIDs:
+        uid = None
-            pred = self.modelBPR.predict(self.userIDs[user], self.itemIDs[game]).numpy()
+        gid = None
-            return int(pred > threshold)
+        if user in self.userIDs:
-        else:
+            uid = self.userIDs[user]
-            return int(game in self.topGames)
+        if game in self.itemIDs:
-
+            gid = self.itemIDs[game]
-    def getTopGames (self, threshold):
+        pred = self.modelBPR.predict(uid, gid).numpy()
-        gameCount = defaultdict(int)
+        return int(pred > threshold)
        totalPlayed = 0
        for user,game,_ in readJSON("train.json.gz"):
            gameCount[game] += 1
            totalPlayed += 1
        mostPopular = [(gameCount[x], x) for x in gameCount]
        mostPopular.sort()
        mostPopular.reverse()
        return1 = set()
        count = 0
        for ic, i in mostPopular:
            count += ic
            return1.add(i)
            if count > totalPlayed * threshold: break
        return return1
 # %%
 model = PlayPredictor()
 model.fit(train, K=6, iters=200)
-error = 0
+# %%
 CM = np.array([[0,0], [0,0]])
 balanced_valid = get_balanced_set(dataset, valid)
 for user, game, review in balanced_valid:
    pred = model.predict(user, game, threshold=0.5)
-    if pred != review["played"]:
+    CM[review["played"]][pred] += 1
        error += 1
-print(f"PlayPredictor accuracy: ", 1 - error / len(balanced_valid))
+print(CM)
 print(f"PlayPredictor accuracy: ", 1 - (CM[1][0] + CM[0][1]) / len(balanced_valid))
 # %%
 writePredictions("pairs_Played.csv", "predictions_Played.csv", model)
--- a/writeup.txt
+++ b/writeup.txt
@ -0,0 +1,5 @@
 For all models, I used the default train/test split ussed in hw3.
 For Play Prediction I found that the Bayseian Personalized Ranking produced a decent error rate. To adapt the ranking to create binary predictions, I use a threshold hyperparameter, and compared the model's prediction vlaue to.
 For Time Prediction, I could not find a model that significantly beat the baseline. However, through tuning hyperparameters, I was able to decrease the MSE by a small margin.