1
0
mirror of https://github.com/ltcptgeneral/cs239-caching.git synced 2025-04-01 12:33:25 +00:00

prefetch test

This commit is contained in:
HiccupHan 2025-02-25 13:02:48 -08:00
parent a276151e0c
commit e9b1128826
5 changed files with 93 additions and 12 deletions

@ -1,5 +1,5 @@
from .cache import Cache
from database import get_user_profile
from database import get_user_profile, get_friends
from collections import OrderedDict
import math
@ -39,15 +39,12 @@ class PrefetchCache(Cache):
def prefetch(self, profile) -> bool:
evict = False
for i in range(math.ceil(self.limit*0.1)):
if i < len(profile["friends"]):
data = get_user_profile(profile["friends"][i])
if len(self.cache) >= self.limit:
self.cache.popitem(last = False)
evict = True
self.cache[profile["friends"][i]] = data
else:
break
friends_prof = get_friends(profile["user_id"], math.ceil(self.limit*0.1))
for i in friends_prof:
if len(self.cache) >= self.limit:
self.cache.popitem(last = False)
evict = True
self.cache[i] = friends_prof[i]
return evict
def invalidate(self, key: str) -> bool:

@ -1,4 +1,4 @@
cache_strategy: "Tiered" # Change this to "Prefetch" or "Tiered" or "Seive"
cache_strategy: "Prefetch" # Change this to "Prefetch" or "Tiered" or "Seive"
cache_limit: 10
l2_cache_limit: 100
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder

@ -1,6 +1,7 @@
from tinydb import TinyDB, Query
from config import DB_FILE
import random
DB_LOCATION = "database/datastore/" + DB_FILE
@ -11,6 +12,22 @@ User = Query()
def get_user_ids():
return [x["user_id"] for x in db.all()]
def get_user_friend_ids():
user_friends = {}
for x in db.all():
user_friends[x["user_id"]] = x["friends"]
return user_friends
def get_friends(user_id, num_friends):
friends = {}
curr_user = db.search(User.user_id == user_id)
random.seed(0)
if not curr_user:
return {}
for f in random.sample(curr_user[0]["friends"], num_friends):
friends[f] = db.search(User.user_id == user_id)[0]
return friends
def get_user_profile(user_id):
"""Fetch user profile from TinyDB"""
result = db.search(User.user_id == user_id)

@ -1,5 +1,5 @@
from fastapi import FastAPI, HTTPException
from database import get_user_ids, get_user_profile, update_user_profile
from database import get_user_ids, get_user_profile, update_user_profile, get_user_friend_ids
from cache.cache import BaselineCache
from cache.prefetch_cache import PrefetchCache
from cache.tiered_cache import TieredCache
@ -25,6 +25,10 @@ else:
def fetch_user_ids():
return {"ids": get_user_ids()}
@app.get("/users_and_friends")
def fetch_user_and_friends():
return get_user_friend_ids()
@app.get("/user/{user_id}")
def fetch_user_profile(user_id: str):
"""Fetch user profile with caching"""

@ -0,0 +1,63 @@
# Tests latency and hit rate of endpoints. Can be configured with weighted averages for various endpoints.
import requests
import random
import json
from tqdm import tqdm
import time
baseurl = "http://localhost:8000"
chance_of_selecting_friend = 0.7
user_friends = json.loads(requests.get(baseurl + "/users_and_friends").content)
user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
random.seed(0)
def fetch_friend(prob):
return random.random() < prob
def generate_random():
random_user = str(random.choice(user_ids))
return random_user
def generate_random_friend(user):
next_user = str(random.choice(user_friends[user]))
return next_user
times = []
hits = []
start = time.time()
curr_user = generate_random()
for i in tqdm(range(10000)):
url = baseurl + "/user/" + curr_user
response = requests.get(url)
content = json.loads(response.content)
times.append(content["time_ms"])
hits.append(content["source"] == "cache")
if fetch_friend(chance_of_selecting_friend):
curr_user = generate_random_friend(curr_user)
else:
curr_user = generate_random()
end = time.time()
hits_count = sum(hits)
miss_count = len(hits) - hits_count
hits_time = 0
miss_time = 0
for i in range(len(times)):
if hits[i]:
hits_time += times[i]
else:
miss_time += times[i]
total_time = hits_time + miss_time
print(f"hits: {hits_count} misses: {miss_count} ratio: { hits_count / (hits_count + miss_count)}")
print(f"average response time (ms) : {total_time / len(times)}")
print(f"average cache hit response time (ms) : {hits_time / hits_count}")
print(f"average cache miss response time (ms): {miss_time / miss_count}")
print(f"cache throughput (requests / ms) : { len(times) / total_time}")
print(f"real throughput (requests / ms) : { len(times) / (end - start) / 1000}")