prefetch test

2025-05-05 10:37:23 +00:00 · 2025-02-25 13:02:48 -08:00 · 2025-02-25 13:02:48 -08:00 · e9b1128826
commit e9b1128826
parent a276151e0c
5 changed files with 93 additions and 12 deletions
--- a/app/cache/prefetch_cache.py
+++ b/app/cache/prefetch_cache.py
@ -1,5 +1,5 @@
 from .cache import Cache
-from database import get_user_profile
+from database import get_user_profile, get_friends
 from collections import OrderedDict
 import math

@ -39,15 +39,12 @@ class PrefetchCache(Cache):
    
    def prefetch(self, profile) -> bool:
        evict = False
-        for i in range(math.ceil(self.limit*0.1)):
-            if i < len(profile["friends"]):
-                data = get_user_profile(profile["friends"][i])
-                if len(self.cache) >= self.limit:
-                    self.cache.popitem(last = False)
-                    evict = True
-                self.cache[profile["friends"][i]] = data
-            else:
-                break
+        friends_prof = get_friends(profile["user_id"], math.ceil(self.limit*0.1))
+        for i in friends_prof:
+            if len(self.cache) >= self.limit:
+                self.cache.popitem(last = False)
+                evict = True
+            self.cache[i] = friends_prof[i]
        return evict

    def invalidate(self, key: str) -> bool:
--- a/app/config.yaml
+++ b/app/config.yaml
@ -1,4 +1,4 @@
-cache_strategy: "Tiered"  # Change this to "Prefetch" or "Tiered" or "Seive"
+cache_strategy: "Prefetch"  # Change this to "Prefetch" or "Tiered" or "Seive"
 cache_limit: 10
 l2_cache_limit: 100
 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
--- a/app/database.py
+++ b/app/database.py
@ -1,6 +1,7 @@

 from tinydb import TinyDB, Query
 from config import DB_FILE
+import random

 DB_LOCATION = "database/datastore/" + DB_FILE

@ -11,6 +12,22 @@ User = Query()
 def get_user_ids():
    return [x["user_id"] for x in db.all()]

+def get_user_friend_ids():
+    user_friends = {}
+    for x in db.all():
+        user_friends[x["user_id"]] = x["friends"]
+    return user_friends
+
+def get_friends(user_id, num_friends):
+    friends = {}
+    curr_user = db.search(User.user_id == user_id)
+    random.seed(0)
+    if not curr_user:
+        return {}
+    for f in random.sample(curr_user[0]["friends"], num_friends):
+        friends[f] = db.search(User.user_id == user_id)[0]
+    return friends
+
 def get_user_profile(user_id):
    """Fetch user profile from TinyDB"""
    result = db.search(User.user_id == user_id)
--- a/app/main.py
+++ b/app/main.py
@ -1,5 +1,5 @@
 from fastapi import FastAPI, HTTPException
-from database import get_user_ids, get_user_profile, update_user_profile
+from database import get_user_ids, get_user_profile, update_user_profile, get_user_friend_ids
 from cache.cache import BaselineCache
 from cache.prefetch_cache import PrefetchCache
 from cache.tiered_cache import TieredCache
@ -25,6 +25,10 @@ else:
 def fetch_user_ids():
    return {"ids": get_user_ids()}

+@app.get("/users_and_friends")
+def fetch_user_and_friends():
+    return get_user_friend_ids()
+
@app.get("/user/{user_id}")
 def fetch_user_profile(user_id: str):
    """Fetch user profile with caching"""
--- a/tests/random_friend_readonly.py
+++ b/tests/random_friend_readonly.py
@ -0,0 +1,63 @@
+# Tests latency and hit rate of endpoints. Can be configured with weighted averages for various endpoints.
+
+import requests
+import random
+import json
+from tqdm import tqdm
+import time
+
+baseurl = "http://localhost:8000"
+
+chance_of_selecting_friend = 0.7
+
+user_friends = json.loads(requests.get(baseurl + "/users_and_friends").content)
+user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
+
+random.seed(0)
+
+def fetch_friend(prob):
+    return random.random() < prob
+
+def generate_random():
+    random_user = str(random.choice(user_ids))
+    return random_user
+
+def generate_random_friend(user):
+    next_user = str(random.choice(user_friends[user]))
+    return next_user
+
+times = []
+hits = []
+
+start = time.time()
+curr_user = generate_random()
+for i in tqdm(range(10000)):
+    url = baseurl + "/user/" + curr_user
+    response = requests.get(url)
+    content = json.loads(response.content)
+    times.append(content["time_ms"])
+    hits.append(content["source"] == "cache")
+    if fetch_friend(chance_of_selecting_friend):
+        curr_user = generate_random_friend(curr_user)
+    else:
+        curr_user = generate_random()
+end = time.time()
+
+hits_count = sum(hits)
+miss_count = len(hits) - hits_count
+
+hits_time = 0
+miss_time = 0
+for i in range(len(times)):
+    if hits[i]:
+        hits_time += times[i]
+    else:
+        miss_time += times[i]
+total_time = hits_time + miss_time
+
+print(f"hits: {hits_count} misses: {miss_count} ratio: { hits_count / (hits_count + miss_count)}")
+print(f"average response time (ms)           : {total_time / len(times)}")
+print(f"average cache hit response time (ms) : {hits_time / hits_count}")
+print(f"average cache miss response time (ms): {miss_time / miss_count}")
+print(f"cache throughput (requests / ms)     : { len(times) / total_time}")
+print(f"real throughput  (requests / ms)     : { len(times) / (end - start) / 1000}")