Compare commits

..

31 Commits

Author SHA1 Message Date
Arthur Lu
db002dbdd0 update results for baseline, ideal, nocache, prefetch, and tiered
add results for readafterwrite

remove frequent_after_write and friend_based from varied workload
2025-03-04 07:17:30 +00:00
Derek Wang
9cd8331a9d Added test specifically for read_after_write 2025-03-03 22:50:49 -08:00
Derek Wang
583c8633f7 Added test specifically for read_after_write 2025-03-03 22:48:35 -08:00
Arthur Lu
d2da61fa86 remove frequent_after_write from unified test script 2025-03-04 06:30:14 +00:00
Arthur Lu
4197ffb0ea INVALIDclear! initial results 2025-03-04 06:29:06 +00:00
root
474d827de6 Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching 2025-03-04 06:24:40 +00:00
Arthur Lu
839e1b8bd7 remove print statements from sieve, update gitignore, minor fix to test utils 2025-03-04 06:24:13 +00:00
HiccupHan
31ffbc18ec Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching 2025-03-03 22:19:51 -08:00
HiccupHan
f737271303 fix for sampling issue 2025-03-03 22:19:49 -08:00
Derek Wang
32ac22806c Fixed POST requests in system 2025-03-03 19:29:55 -08:00
Arthur Lu
8430009f8c update order in varied workload 2025-03-03 22:50:23 +00:00
Arthur Lu
8cb333b305 fix weighted friend test naming 2025-03-03 22:30:43 +00:00
Arthur Lu
b28decfc91 fix sieve nameing 2025-03-03 22:29:45 +00:00
PurvaG1700
d57069856f [ADD]- Added end to end varied workloads 2025-03-02 23:49:26 -08:00
Derek Wang
72b72a949f Added ReadAfterWrite 2025-03-02 19:41:55 -08:00
Arthur Lu
35ea5a234f add no cache and ideal cache,
move printing cache reports to util file
2025-03-02 21:22:31 +00:00
Arthur Lu
50b5ea0acd update configs for new database size,
add simulated 10ms delay to database,
updated test script naming to better convey the experiment method
2025-03-01 00:35:51 +00:00
HiccupHan
3d40e4cc6f new data 2025-02-28 12:24:54 -08:00
HiccupHan
9699111c11 Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching 2025-02-28 12:15:18 -08:00
HiccupHan
ebf531f586 new data 2025-02-28 12:15:11 -08:00
root
6b082e0fea Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching 2025-02-28 20:12:33 +00:00
Arthur Lu
e5d8d3c0cf add basic data generation 2025-02-28 20:12:23 +00:00
HiccupHan
1c30154aa7 Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching 2025-02-28 12:03:46 -08:00
HiccupHan
33d9c0b716 added data 2025-02-28 12:03:38 -08:00
Arthur Lu
c3340e037d improve tiered cache with return to l1 mechanic 2025-02-28 19:21:53 +00:00
Arthur Lu
f7903f4fea add individual config files for each strategy,
config file path can be specified as the second argument
2025-02-28 19:17:44 +00:00
HiccupHan
e9b1128826 prefetch test 2025-02-25 13:02:48 -08:00
isha28-uclaCS
a276151e0c Seive Testing 2025-02-24 22:04:47 -08:00
HiccupHan
f475991f50 randomize friends further 2025-02-24 17:47:07 -08:00
Arthur Lu
9dd93d96f2 Merge pull request #1 from ltcptgeneral/metrics
add throughput and hit ratio metrics
2025-02-23 11:18:56 -08:00
HiccupHan
5dc9a99a3a Add to db instead of creating new one 2025-02-22 19:57:04 -08:00
35 changed files with 24996 additions and 81 deletions

2
.gitignore vendored
View File

@@ -7,6 +7,7 @@ __pycache__/
# Ignore database files (TinyDB JSON) # Ignore database files (TinyDB JSON)
database.json database.json
temp_DB.json
# Ignore environment variables file (if used) # Ignore environment variables file (if used)
.env .env
@@ -22,4 +23,3 @@ database.json
# Ignore MacOS system files # Ignore MacOS system files
.DS_Store .DS_Store

View File

@@ -8,7 +8,7 @@ class Node:
self.next = None self.next = None
self.prev = None self.prev = None
class SeiveCache(Cache): class SieveCache(Cache):
def __init__(self, limit: int): def __init__(self, limit: int):
super().__init__(limit) super().__init__(limit)
self.limit = limit # Fix: Store limit properly self.limit = limit # Fix: Store limit properly
@@ -18,13 +18,13 @@ class SeiveCache(Cache):
self.hand = None self.hand = None
def print_cache_state(self): def print_cache_state(self):
print("Current cache state:") #print("Current cache state:")
node = self.head node = self.head
if not node: if not node:
print("Cache is empty.") #print("Cache is empty.")
return return
for _ in range(len(self.cache)): for _ in range(len(self.cache)):
print(f"Key: {node.key}, Value: {node.value}, Visited: {node.visited}") #print(f"Key: {node.key}, Value: {node.value}, Visited: {node.visited}")
node = node.next node = node.next
if node == self.head: if node == self.head:
break break
@@ -33,20 +33,17 @@ class SeiveCache(Cache):
if key in self.cache: if key in self.cache:
node = self.cache[key] node = self.cache[key]
node.visited = True node.visited = True
print(f"GET {key}: {node.value}") #self.print_cache_state()
self.print_cache_state()
return node.value return node.value
print(f"GET {key}: MISS")
self.print_cache_state() self.print_cache_state()
return None return None
def put(self, key: str, val: str) -> bool: def put(self, key: str, val: str) -> bool:
print(f"PUT {key}: {val}")
if key in self.cache: if key in self.cache:
node = self.cache[key] node = self.cache[key]
node.value = val node.value = val
node.visited = True node.visited = True
self.print_cache_state() #self.print_cache_state()
return False # No eviction needed return False # No eviction needed
new_node = Node(key, val) new_node = Node(key, val)
@@ -66,11 +63,10 @@ class SeiveCache(Cache):
self.cache[key] = new_node self.cache[key] = new_node
if not self.hand: if not self.hand:
self.hand = self.head self.hand = self.head
self.print_cache_state() #self.print_cache_state()
return False return False
def invalidate(self, key: str) -> bool: def invalidate(self, key: str) -> bool:
print(f"INVALIDATE {key}")
if key in self.cache: if key in self.cache:
node = self.cache.pop(key) node = self.cache.pop(key)
if node == self.head: if node == self.head:
@@ -81,16 +77,14 @@ class SeiveCache(Cache):
node.next.prev = node.prev node.next.prev = node.prev
if node.prev: if node.prev:
node.prev.next = node.next node.prev.next = node.next
self.print_cache_state() #self.print_cache_state()
return True return True
print("INVALIDATE FAILED: Key not found")
return False return False
def next_hand(self): def next_hand(self):
self.hand = self.hand.next if self.hand.next else self.head self.hand = self.hand.next if self.hand.next else self.head
def evict(self): def evict(self):
print("EVICTION START")
while self.hand.visited: while self.hand.visited:
self.hand.visited = False self.hand.visited = False
self.next_hand() self.next_hand()
@@ -107,8 +101,7 @@ class SeiveCache(Cache):
obj_to_evict.prev.next = obj_to_evict.next obj_to_evict.prev.next = obj_to_evict.next
del self.cache[obj_to_evict.key] del self.cache[obj_to_evict.key]
print(f"EVICTED {obj_to_evict.key}") #self.print_cache_state()
self.print_cache_state()
# Basic API demo for future testing # Basic API demo for future testing
if __name__ == "__main__": if __name__ == "__main__":

16
app/cache/idealcache.py vendored Normal file
View File

@@ -0,0 +1,16 @@
from .cache import Cache
from database import get_user_profile
class IdealCache(Cache):
def __init__(self, limit: int):
pass
def get(self, key):
return get_user_profile(key)
def put(self, key, val):
return False
def invalidate(self, key):
return False

15
app/cache/nocache.py vendored Normal file
View File

@@ -0,0 +1,15 @@
from .cache import Cache
class NoCache(Cache):
def __init__(self, limit: int):
pass
def get(self, key):
return None
def put(self, key, val):
return False
def invalidate(self, key):
return False

View File

@@ -1,11 +1,29 @@
from .cache import BaselineCache from .cache import Cache
from database import get_user_profile, get_friends
from collections import OrderedDict
import math
class PrefetchCache(BaselineCache): class PrefetchCache(Cache):
key_relations = None limit = None
cache = None
def __init__(self): def __init__(self, limit):
super() super()
self.key_relations = dict() self.limit = limit
self.cache = OrderedDict()
def __eq__(self, other):
return self.cache == other
def __len__(self):
return len(self.cache)
def get(self, key: str) -> str:
if key in self.cache:
self.cache.move_to_end(key)
return self.cache[key]
else:
return None
def put(self, key: str, val: str) -> bool: def put(self, key: str, val: str) -> bool:
# LRU evict # LRU evict
@@ -14,16 +32,26 @@ class PrefetchCache(BaselineCache):
self.cache.popitem(last = False) self.cache.popitem(last = False)
evict = True evict = True
self.cache[key] = val self.cache[key] = val
self.prefetch(key, val) if self.prefetch(val):
evict = True
return evict return evict
def prefetch(self, key: str, val: str) -> bool: def prefetch(self, profile) -> bool:
if len(self.cache) >= self.limit and key in self.key_relations: evict = False
self.cache[self.key_relations[key][0]] = self.key_relations[key][1] friends_prof = get_friends(profile["user_id"], math.ceil(self.limit*0.1))
for i in friends_prof:
if len(self.cache) >= self.limit:
self.cache.popitem(last = False)
evict = True
self.cache[i] = friends_prof[i]
return evict
def invalidate(self, key: str) -> bool:
# basic delete invalidation, no (p)refetching
if key in self.cache:
del self.cache[key]
return True return True
else:
return False return False
def set_relations(self, key: str, related_key: str, related_val: str):
self.key_relations[key] = related_key | related_val
return

20
app/cache/read_after_write_cache.py vendored Normal file
View File

@@ -0,0 +1,20 @@
from .cache import BaselineCache
from database import get_user_profile
class ReadAfterWriteCache(BaselineCache):
def __init__(self, limit):
super().__init__( limit )
def invalidate(self, key: str) -> bool:
# basic delete invalidation, but after writing, we immediately read the value and add it to the cache
invalidated = False
if key in self.cache:
del self.cache[key]
invalidated = True
newData = get_user_profile( key )
self.put( key, newData )
return invalidated

View File

@@ -21,6 +21,11 @@ class TieredCache(BaselineCache):
f = open(self.l2_map[key], "r") f = open(self.l2_map[key], "r")
v = f.read() v = f.read()
f.close() f.close()
# we will also preemptively return the value from l1 to l2:
del self.l2_map[key]
self.put(key, v)
return v return v
else: # otherwise its a cache miss and return None else: # otherwise its a cache miss and return None
return None return None

View File

@@ -1,7 +1,8 @@
import os import os
import yaml import yaml
import sys
CONFIG_FILE = "config.yaml" CONFIG_FILE = sys.argv[1]
def load_config(): def load_config():
with open(CONFIG_FILE, "r") as f: with open(CONFIG_FILE, "r") as f:

View File

@@ -1,4 +0,0 @@
cache_strategy: "Tiered" # Change this to "Prefetch" or "Tiered" or "Seive"
cache_limit: 10
l2_cache_limit: 100
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder

4
app/config_baseline.yaml Normal file
View File

@@ -0,0 +1,4 @@
cache_strategy: "Baseline"
cache_limit: 50
l2_cache_limit: 100 # unused
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder

4
app/config_ideal.yaml Normal file
View File

@@ -0,0 +1,4 @@
cache_strategy: "Ideal"
cache_limit: 50
l2_cache_limit: 100 # unused
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder

4
app/config_nocache.yaml Normal file
View File

@@ -0,0 +1,4 @@
cache_strategy: "None"
cache_limit: 50
l2_cache_limit: 100 # unused
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder

4
app/config_prefetch.yaml Normal file
View File

@@ -0,0 +1,4 @@
cache_strategy: "Prefetch"
cache_limit: 50
l2_cache_limit: 100 # unused
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder

View File

@@ -0,0 +1,4 @@
cache_strategy: "ReadAfterWrite"
cache_limit: 50
l2_cache_limit: 100 # unused
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder

4
app/config_sieve.yaml Normal file
View File

@@ -0,0 +1,4 @@
cache_strategy: "Sieve"
cache_limit: 50
l2_cache_limit: 100 # unused
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder

4
app/config_tiered.yaml Normal file
View File

@@ -0,0 +1,4 @@
cache_strategy: "Tiered"
cache_limit: 25
l2_cache_limit: 175
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder

View File

@@ -1,24 +1,53 @@
from tinydb import TinyDB, Query from tinydb import TinyDB, Query
from config import DB_FILE from config import DB_FILE
import shutil
import random
DB_LOCATION = "database/datastore/" + DB_FILE DB_LOCATION = "database/datastore/" + DB_FILE
# Initialize TinyDB as a NoSQL key-value store # Initialize TinyDB as a NoSQL key-value store
db = TinyDB(DB_LOCATION) # We don't want to change our main DB file, so we will make a temp DB file and use that as our DB file
shutil.copyfile( DB_LOCATION, "temp_DB.json" )
db = TinyDB("temp_DB.json")
User = Query() User = Query()
def get_user_ids(): def get_user_ids():
return [x["user_id"] for x in db.all()] return [x["user_id"] for x in db.all()]
def get_user_friend_ids():
user_friends = {}
for x in db.all():
user_friends[x["user_id"]] = x["friends"]
return user_friends
def get_friends(user_id, num_friends):
friends = {}
curr_user = db.search(User.user_id == user_id)
random.seed(0)
if not curr_user:
return {}
sample_size = min(num_friends, len(curr_user[0]["friends"]))
for f in random.sample(curr_user[0]["friends"], sample_size):
friends[f] = db.search(User.user_id == user_id)[0]
return friends
def get_user_profile(user_id): def get_user_profile(user_id):
"""Fetch user profile from TinyDB""" """Fetch user profile from TinyDB"""
result = db.search(User.user_id == user_id) result = db.search(User.user_id == user_id)
return result[0] if result else None return result[0] if result else None
def update_user_profile(user_id, name, followers, bio, posts, friends): def update_user_profile( data ):
"""Update user profile in TinyDB""" """Update user profile in TinyDB"""
db.upsert({"user_id": user_id, "name": name, "followers": followers, "bio": bio, "posts": posts, "friends": friends}, User.user_id == user_id) user_id = str( data["user_id"] )
# Basically make sure friends stay the same (for prefetching). Not great implementation, but it works
curr_user = db.search(User.user_id == user_id)
if( curr_user and data["friends"] == None ):
data["friends"] = curr_user[0]["friends"]
db.upsert( data, User.user_id == user_id )
def init_db(): def init_db():
"""Ensure TinyDB is initialized before FastAPI starts and prepopulate some data""" """Ensure TinyDB is initialized before FastAPI starts and prepopulate some data"""
@@ -28,7 +57,7 @@ def init_db():
# Prepopulate database with some sample users if empty # Prepopulate database with some sample users if empty
if len(db) == 0: if len(db) == 0:
db.insert_multiple([ db.insert_multiple([
{"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!"}, {"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!", "friends": ["2"]},
{"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!"}, {"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!","friends": ["3", "1"]},
{"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!"} {"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!", "friends": ["1"]}
]) ])

File diff suppressed because it is too large Load Diff

View File

@@ -6,6 +6,10 @@ warnings.filterwarnings('ignore')
import re import re
import random import random
import json import json
from tinydb import TinyDB
from tinydb.storages import JSONStorage
from tinydb.middlewares import CachingMiddleware
import math
HUGGINGFACEHUB_API_TOKEN = None HUGGINGFACEHUB_API_TOKEN = None
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
@@ -21,13 +25,11 @@ def parse_profile(text, user_id, num_users):
match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text) match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text)
name, bio, posts = match.groups() name, bio, posts = match.groups()
# Generate mock followers count (randomized for realism)
followers = random.randint(10, 5000) followers = random.randint(10, 5000)
# Generate mock friends (users with nearby IDs) friend_ids = [str(fid) for fid in range(user_id) if fid != user_id]
friend_ids = [str(fid) for fid in range(num_users) if fid != user_id]
random.shuffle(friend_ids) random.shuffle(friend_ids)
friends = friend_ids[:random.randint(1, num_users-1)] # Each user gets 1-5 friends friends = friend_ids[:random.randint(1, min(100, math.ceil(num_users/3)))]
return { return {
"user_id": str(user_id), "user_id": str(user_id),
@@ -38,7 +40,7 @@ def parse_profile(text, user_id, num_users):
"friends": friends "friends": friends
} }
def generate_data(num_users): def generate_data(base_id, num_users):
system_message = """You are a data generator creating user profiles for a social media app. system_message = """You are a data generator creating user profiles for a social media app.
Always provide user profiles in this format: Name | Interest | Recent Activity. Always provide user profiles in this format: Name | Interest | Recent Activity.
Do not include numbers, IDs, or assistant labels. Only return a properly formatted response. Do not include numbers, IDs, or assistant labels. Only return a properly formatted response.
@@ -60,25 +62,30 @@ def generate_data(num_users):
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN, huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
) )
llm_chain = prompt | llm llm_chain = prompt | llm
data = {} data = []
i = 0 i = base_id
user_id = 0 user_id = 0
while user_id < num_users: while user_id < num_users:
raw_text = llm_chain.invoke({"user_id": i}) raw_text = llm_chain.invoke({"user_id": i})
while not valid_data(raw_text): while not valid_data(raw_text):
i = i + 1 i = i + 1
raw_text = llm_chain.invoke({"user_id": i}) raw_text = llm_chain.invoke({"user_id": i})
user_profile = parse_profile(raw_text, user_id, num_users) user_profile = parse_profile(raw_text, base_id + user_id, num_users)
user_id = user_id + 1 user_id = user_id + 1
data[user_id] = user_profile i = i + 1
data.append(user_profile)
return data return data
if __name__ == "__main__": if __name__ == "__main__":
data = generate_data(100) base_id = input("Enter base id (check db to find the next consecutive user_id): ")
num_users = input("Enter number of users to generate: ")
data = generate_data(int(base_id), int(num_users))
# Create json file # Create json file
json_object = json.dumps( {"_default": data}, indent=4 ) file_path = "datastore/llmData_sns.json"
with open( "datastore/llmData_sns.json", "w" ) as f: global db
f.write( json_object ) db = TinyDB(file_path, storage=CachingMiddleware(JSONStorage), indent=4)
db.insert_multiple(data)
db.close()

View File

@@ -0,0 +1,33 @@
import random, string
from tinydb import TinyDB
from tinydb.storages import JSONStorage
from tinydb.middlewares import CachingMiddleware
def randomstring(length):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(length))
def generate_data(base_id, num_users):
data = []
for i in range(num_users):
data.append({
"user_id": str(base_id + i),
"name": randomstring(10),
"followers": random.randint(1, 10000),
"bio": randomstring(250),
"posts": randomstring(100),
"friends": random.choices(range(base_id + i - 1), k=random.randint(1, 50))
})
return data
base_id = input("Enter base id (check db to find the next consecutive user_id): ")
num_users = input("Enter number of users to generate: ")
data = generate_data(int(base_id), int(num_users))
# Create json file
file_path = "datastore/llmData_sns.json"
global db
db = TinyDB(file_path, storage=CachingMiddleware(JSONStorage), indent=4)
db.insert_multiple(data)
db.close()

View File

@@ -1,23 +1,40 @@
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from database import get_user_ids, get_user_profile, update_user_profile from database import get_user_ids, get_user_profile, update_user_profile, get_user_friend_ids
from cache.cache import BaselineCache from cache.cache import BaselineCache
from cache.prefetch_cache import PrefetchCache from cache.prefetch_cache import PrefetchCache
from cache.tiered_cache import TieredCache from cache.tiered_cache import TieredCache
from cache.eviction_seive import SeiveCache from cache.eviction_sieve import SieveCache
from cache.nocache import NoCache
from cache.idealcache import IdealCache
from cache.read_after_write_cache import ReadAfterWriteCache
from config import CACHE_STRATEGY, CACHE_LIMIT, L2_CACHE_LIMIT from config import CACHE_STRATEGY, CACHE_LIMIT, L2_CACHE_LIMIT
from models.models import User
import time import time
app = FastAPI() app = FastAPI()
# Initialize cache based on strategy from config.yaml or environment variable # Initialize cache based on strategy from config.yaml or environment variable
if CACHE_STRATEGY == "Baseline": if CACHE_STRATEGY == "Baseline":
print("Using baseline cache strategy")
cache = BaselineCache(limit=CACHE_LIMIT) cache = BaselineCache(limit=CACHE_LIMIT)
elif CACHE_STRATEGY == "Prefetch": elif CACHE_STRATEGY == "Prefetch":
print("Using prefetch cache strategy")
cache = PrefetchCache(limit=CACHE_LIMIT) cache = PrefetchCache(limit=CACHE_LIMIT)
elif CACHE_STRATEGY == "Tiered": elif CACHE_STRATEGY == "Tiered":
print("Using tiered cache strategy")
cache = TieredCache(limit=CACHE_LIMIT, l2_limit=L2_CACHE_LIMIT) cache = TieredCache(limit=CACHE_LIMIT, l2_limit=L2_CACHE_LIMIT)
elif CACHE_STRATEGY == "Seive": elif CACHE_STRATEGY == "Sieve":
cache = SeiveCache(limit=CACHE_LIMIT) print("Using sieve cache strategy")
cache = SieveCache(limit=CACHE_LIMIT)
elif CACHE_STRATEGY == "None":
print("Using no cache strategy")
cache = NoCache(limit=CACHE_LIMIT)
elif CACHE_STRATEGY == "Ideal":
print("Using ideal cache strategy")
cache = IdealCache(limit=CACHE_LIMIT)
elif CACHE_STRATEGY == "ReadAfterWrite":
print("Using read-after-write cache strategy")
cache = ReadAfterWriteCache(limit=CACHE_LIMIT)
else: else:
raise ValueError(f"Invalid CACHE_STRATEGY: {CACHE_STRATEGY}") raise ValueError(f"Invalid CACHE_STRATEGY: {CACHE_STRATEGY}")
@@ -25,6 +42,10 @@ else:
def fetch_user_ids(): def fetch_user_ids():
return {"ids": get_user_ids()} return {"ids": get_user_ids()}
@app.get("/users_and_friends")
def fetch_user_and_friends():
return get_user_friend_ids()
@app.get("/user/{user_id}") @app.get("/user/{user_id}")
def fetch_user_profile(user_id: str): def fetch_user_profile(user_id: str):
"""Fetch user profile with caching""" """Fetch user profile with caching"""
@@ -34,6 +55,7 @@ def fetch_user_profile(user_id: str):
return {"user_id": user_id, "profile": cached_profile, "source": "cache", "time_ms": (time.time() - start) * 1000} return {"user_id": user_id, "profile": cached_profile, "source": "cache", "time_ms": (time.time() - start) * 1000}
profile = get_user_profile(user_id) profile = get_user_profile(user_id)
time.sleep(10 / 1000) # simulate 10 ms db delay, we do this here instead of the actual db in the ideal cache case
if profile is None: if profile is None:
raise HTTPException(status_code=404, detail="User not found") raise HTTPException(status_code=404, detail="User not found")
@@ -41,8 +63,11 @@ def fetch_user_profile(user_id: str):
return {"user_id": user_id, "profile": profile, "source": "database", "time_ms": (time.time() - start) * 1000} return {"user_id": user_id, "profile": profile, "source": "database", "time_ms": (time.time() - start) * 1000}
@app.post("/update_user/") @app.post("/update_user/")
def modify_user_profile(user_id: str, name: str, followers: int, bio: str, posts: str, friends: list[str]): async def modify_user_profile(user_data : User):
"""Update user profile and refresh cache""" """Update user profile and refresh cache"""
update_user_profile(user_id, name, followers, bio, posts, friends) user_id=user_data.user_id
user_dict = user_data.dict()
update_user_profile(user_dict)
cache.invalidate(user_id) # Invalidate old cache cache.invalidate(user_id) # Invalidate old cache
return {"message": "User profile updated successfully"} return {"message": "User profile updated successfully"}

9
app/models/models.py Normal file
View File

@@ -0,0 +1,9 @@
from pydantic import BaseModel
class User(BaseModel):
user_id: str
name: str | None = None
followers: int | None = None
bio: str | None = None
posts: str | None = None
friends: list | None = None

View File

@@ -3,3 +3,4 @@ uvicorn
tinydb tinydb
pyyaml pyyaml
requests requests
tqdm

View File

@@ -0,0 +1,71 @@
import requests
import random
import json
from tqdm import tqdm
import time
from collections import deque
from utils import print_report
baseurl = "http://localhost:8000"
endpoints = {
"/user/{user_id}": 0.5, # 50% read operations
"/update_user/?user_id={user_id}&name=Test&followers=100&bio=Updated&posts=Updated": 0.5 # 50% write operations
}
# Fetch all user IDs
user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
random.seed(0)
prev_updated_users = deque()
def generate_random():
"""Randomly generate a read or write request, favoring cache hits."""
endpoint = random.choices(list(endpoints.keys()), list(endpoints.values()))[0]
# Reads
if endpoint == "/user/{user_id}":
# Favor frequently accessed user IDs to increase hit ratio
if( prev_updated_users ):
random_user = str(random.choice(prev_updated_users)) if random.random() < 0.7 else str(random.choice(user_ids))
else:
random_user = str(random.choice(user_ids))
return baseurl + endpoint.replace("{user_id}", random_user)
# Writes
else:
random_user = str(random.choice(user_ids))
prev_updated_users.append( random_user )
if( len( prev_updated_users ) > 10 ):
prev_updated_users.popleft()
return random_user
times = []
hits = []
start = time.time()
for i in tqdm(range(10000)):
url = generate_random()
if( "user" not in url ):
write_obj = { "user_id":url,"name": "Test", "followers":"100","bio":"updated","posts":"updated"}
response = requests.post("http://localhost:8000/update_user/", json = write_obj)
else:
response = requests.get(url)
try:
content = json.loads(response.content)
if "time_ms" in content: # Only process if "time_ms" exists
times.append(content["time_ms"])
hits.append(content["source"] == "cache")
except json.JSONDecodeError:
print(f"Error decoding JSON: {response.content}")
exit(1)
except KeyError:
print(f"Unexpected response format: {content}")
exit(1)
end = time.time()
print(f"\n--- Results ---")
print_report(hits, times, end - start)

View File

@@ -5,6 +5,7 @@ import random
import json import json
from tqdm import tqdm from tqdm import tqdm
import time import time
from utils import print_report
baseurl = "http://localhost:8000" baseurl = "http://localhost:8000"
@@ -36,21 +37,4 @@ for i in tqdm(range(10000)):
hits.append(content["source"] == "cache") hits.append(content["source"] == "cache")
end = time.time() end = time.time()
hits_count = sum(hits) print_report(hits, times, end - start)
miss_count = len(hits) - hits_count
hits_time = 0
miss_time = 0
for i in range(len(times)):
if hits[i]:
hits_time += times[i]
else:
miss_time += times[i]
total_time = hits_time + miss_time
print(f"hits: {hits_count} misses: {miss_count} ratio: { hits_count / (hits_count + miss_count)}")
print(f"average response time (ms) : {total_time / len(times)}")
print(f"average cache hit response time (ms) : {hits_time / hits_count}")
print(f"average cache miss response time (ms): {miss_time / miss_count}")
print(f"cache throughput (requests / ms) : { len(times) / total_time}")
print(f"real throughput (requests / ms) : { len(times) / (end - start) / 1000}")

48
tests/results_baseline Normal file
View File

@@ -0,0 +1,48 @@
--- random_read Results ---
hits: 507 misses: 9493 ratio: 0.0507
average response time (ms) : 12.171702599525451
average cache hit response time (ms) : 0.0013355202458549063
average cache miss response time (ms): 12.821694815810583
cache throughput (requests / s) : 82.15777470926605
real throughput (requests / s) : 75.03700407782604
--- read_heavy Results ---
hits: 394 misses: 7631 ratio: 0.04909657320872274
average response time (ms) : 12.201453324790313
average cache hit response time (ms) : 0.0013572915556467125
average cache miss response time (ms): 12.831362620701
cache throughput (requests / s) : 81.95744993493923
real throughput (requests / s) : 73.58096764800857
--- write_heavy Results ---
hits: 112 misses: 1906 ratio: 0.05550049554013875
average response time (ms) : 12.256505940666992
average cache hit response time (ms) : 0.0013560056686401367
average cache miss response time (ms): 12.976640669271303
cache throughput (requests / s) : 81.58932120140436
real throughput (requests / s) : 58.17744310065741
--- frequent_users Results ---
hits: 7096 misses: 2904 ratio: 0.7096
average response time (ms) : 3.7240190267562867
average cache hit response time (ms) : 0.000715490930378773
average cache miss response time (ms): 12.822008658374966
cache throughput (requests / s) : 268.52709205168185
real throughput (requests / s) : 211.60014253369587
--- frequent_after_write Results ---
hits: 1068 misses: 3862 ratio: 0.21663286004056795
average response time (ms) : 9.787649653746197
average cache hit response time (ms) : 0.0015231553981366675
average cache miss response time (ms): 12.493911461160938
cache throughput (requests / s) : 102.16957445113012
real throughput (requests / s) : 48.726267135940624
--- weighted_friend_readonly Results ---
hits: 1275 misses: 8725 ratio: 0.1275
average response time (ms) : 11.204515027999879
average cache hit response time (ms) : 0.0013659982120289521
average cache miss response time (ms): 12.841651419172313
cache throughput (requests / s) : 89.249735263063
real throughput (requests / s) : 80.8252530462673

48
tests/results_ideal Normal file
View File

@@ -0,0 +1,48 @@
--- random_read Results ---
hits: 10000 misses: 0 ratio: 1.0
average response time (ms) : 2.7374683618545532
average cache hit response time (ms) : 2.7374683618545532
average cache miss response time (ms): N/A
cache throughput (requests / s) : 365.3010255514076
real throughput (requests / s) : 264.62084114988136
--- read_heavy Results ---
hits: 8025 misses: 0 ratio: 1.0
average response time (ms) : 2.3531507034539434
average cache hit response time (ms) : 2.3531507034539434
average cache miss response time (ms): N/A
cache throughput (requests / s) : 424.96215755846185
real throughput (requests / s) : 174.59831308637462
--- write_heavy Results ---
hits: 2018 misses: 0 ratio: 1.0
average response time (ms) : 2.5427952275876367
average cache hit response time (ms) : 2.5427952275876367
average cache miss response time (ms): N/A
cache throughput (requests / s) : 393.26800253149185
real throughput (requests / s) : 24.773972578001025
--- frequent_users Results ---
hits: 10000 misses: 0 ratio: 1.0
average response time (ms) : 1.3468331575393677
average cache hit response time (ms) : 1.3468331575393677
average cache miss response time (ms): N/A
cache throughput (requests / s) : 742.4824629555278
real throughput (requests / s) : 424.7398305196224
--- frequent_after_write Results ---
hits: 4930 misses: 0 ratio: 1.0
average response time (ms) : 2.266260528177567
average cache hit response time (ms) : 2.266260528177567
average cache miss response time (ms): N/A
cache throughput (requests / s) : 441.25553420116205
real throughput (requests / s) : 77.73575713810418
--- weighted_friend_readonly Results ---
hits: 10000 misses: 0 ratio: 1.0
average response time (ms) : 2.7157030582427977
average cache hit response time (ms) : 2.7157030582427977
average cache miss response time (ms): N/A
cache throughput (requests / s) : 368.228771170237
real throughput (requests / s) : 267.4051580450448

48
tests/results_nocache Normal file
View File

@@ -0,0 +1,48 @@
--- random_read Results ---
hits: 0 misses: 10000 ratio: 0.0
average response time (ms) : 12.805271649360657
average cache hit response time (ms) : N/A
average cache miss response time (ms): 12.805271649360657
cache throughput (requests / s) : 78.09283765174385
real throughput (requests / s) : 71.53643320734811
--- read_heavy Results ---
hits: 0 misses: 8025 ratio: 0.0
average response time (ms) : 12.849865197392639
average cache hit response time (ms) : N/A
average cache miss response time (ms): 12.849865197392639
cache throughput (requests / s) : 77.82182806111534
real throughput (requests / s) : 70.04929255901189
--- write_heavy Results ---
hits: 0 misses: 2018 ratio: 0.0
average response time (ms) : 12.95277124826452
average cache hit response time (ms) : N/A
average cache miss response time (ms): 12.95277124826452
cache throughput (requests / s) : 77.20355596752974
real throughput (requests / s) : 55.53823196963498
--- frequent_users Results ---
hits: 0 misses: 10000 ratio: 0.0
average response time (ms) : 11.695257019996642
average cache hit response time (ms) : N/A
average cache miss response time (ms): 11.695257019996642
cache throughput (requests / s) : 85.50474763318087
real throughput (requests / s) : 77.81244515584922
--- frequent_after_write Results ---
hits: 0 misses: 4930 ratio: 0.0
average response time (ms) : 12.357432702008415
average cache hit response time (ms) : N/A
average cache miss response time (ms): 12.357432702008415
cache throughput (requests / s) : 80.92295739045157
real throughput (requests / s) : 43.34797667618127
--- weighted_friend_readonly Results ---
hits: 0 misses: 10000 ratio: 0.0
average response time (ms) : 12.757544946670532
average cache hit response time (ms) : N/A
average cache miss response time (ms): 12.757544946670532
cache throughput (requests / s) : 78.38498740786174
real throughput (requests / s) : 71.68974303163175

48
tests/results_prefetch Normal file
View File

@@ -0,0 +1,48 @@
--- random_read Results ---
hits: 499 misses: 9501 ratio: 0.0499
average response time (ms) : 12.209891486167908
average cache hit response time (ms) : 0.0013277860347159162
average cache miss response time (ms): 12.851094863324677
cache throughput (requests / s) : 81.90080977647176
real throughput (requests / s) : 74.83126189192693
--- read_heavy Results ---
hits: 405 misses: 7620 ratio: 0.05046728971962617
average response time (ms) : 11.846880363155377
average cache hit response time (ms) : 0.0013816503830897955
average cache miss response time (ms): 12.476463956157053
cache throughput (requests / s) : 84.41040757954049
real throughput (requests / s) : 65.28853446272649
--- write_heavy Results ---
hits: 95 misses: 1923 ratio: 0.04707631318136769
average response time (ms) : 12.044375786341808
average cache hit response time (ms) : 0.0017768458316200658
average cache miss response time (ms): 12.639303971130403
cache throughput (requests / s) : 83.02630354110914
real throughput (requests / s) : 20.054516595218445
--- frequent_users Results ---
hits: 5788 misses: 4212 ratio: 0.5788
average response time (ms) : 5.223854732513428
average cache hit response time (ms) : 0.0010156268981216525
average cache miss response time (ms): 12.400918536716038
cache throughput (requests / s) : 191.42951923528236
real throughput (requests / s) : 159.80528851176524
--- frequent_after_write Results ---
hits: 1047 misses: 3883 ratio: 0.2123732251521298
average response time (ms) : 9.824660583635373
average cache hit response time (ms) : 0.0011850337016662189
average cache miss response time (ms): 12.47343186892525
cache throughput (requests / s) : 101.78468675708436
real throughput (requests / s) : 48.92688928507726
--- weighted_friend_readonly Results ---
hits: 2838 misses: 7162 ratio: 0.2838
average response time (ms) : 9.23909306526184
average cache hit response time (ms) : 0.0012533357229091652
average cache miss response time (ms): 12.899661223937
cache throughput (requests / s) : 108.23573189882784
real throughput (requests / s) : 96.54757454253809

View File

@@ -0,0 +1,48 @@
--- random_read Results ---
hits: 505 misses: 9495 ratio: 0.0505
average response time (ms) : 12.217395067214966
average cache hit response time (ms) : 0.0012926536031288675
average cache miss response time (ms): 12.867119313541872
cache throughput (requests / s) : 81.85050859847135
real throughput (requests / s) : 74.74830604127537
--- read_heavy Results ---
hits: 394 misses: 7631 ratio: 0.04909657320872274
average response time (ms) : 12.254692267777392
average cache hit response time (ms) : 0.001333086623758229
average cache miss response time (ms): 12.887351620073884
cache throughput (requests / s) : 81.6013962773598
real throughput (requests / s) : 73.22107615138934
--- write_heavy Results ---
hits: 112 misses: 1906 ratio: 0.05550049554013875
average response time (ms) : 12.277225436729529
average cache hit response time (ms) : 0.001330460820879255
average cache miss response time (ms): 12.998579181378936
cache throughput (requests / s) : 81.45162806967119
real throughput (requests / s) : 57.99762893791816
--- frequent_users Results ---
hits: 7096 misses: 2904 ratio: 0.7096
average response time (ms) : 3.7444978952407837
average cache hit response time (ms) : 0.0007732812285826467
average cache miss response time (ms): 12.8923869658436
cache throughput (requests / s) : 267.05850236182243
real throughput (requests / s) : 210.51762371076964
--- frequent_after_write Results ---
hits: 3509 misses: 1421 ratio: 0.711764705882353
average response time (ms) : 3.600564883156674
average cache hit response time (ms) : 0.0013591693520511987
average cache miss response time (ms): 12.488399400919109
cache throughput (requests / s) : 277.7341979526512
real throughput (requests / s) : 60.02808337161936
--- weighted_friend_readonly Results ---
hits: 1275 misses: 8725 ratio: 0.1275
average response time (ms) : 11.231861686706543
average cache hit response time (ms) : 0.0015234479717179841
average cache miss response time (ms): 12.872971286063208
cache throughput (requests / s) : 89.03243539613284
real throughput (requests / s) : 80.69348254832964

48
tests/results_tiered Normal file
View File

@@ -0,0 +1,48 @@
--- random_read Results ---
hits: 1966 misses: 8034 ratio: 0.1966
average response time (ms) : 10.438107824325561
average cache hit response time (ms) : 0.09135385010759076
average cache miss response time (ms): 12.97006180905453
cache throughput (requests / s) : 95.80280418923658
real throughput (requests / s) : 86.41953468377949
--- read_heavy Results ---
hits: 1563 misses: 6462 ratio: 0.19476635514018692
average response time (ms) : 10.474605560302734
average cache hit response time (ms) : 0.0937643984686619
average cache miss response time (ms): 12.98547754048637
cache throughput (requests / s) : 95.46898871207694
real throughput (requests / s) : 84.39105440656928
--- write_heavy Results ---
hits: 415 misses: 1603 ratio: 0.20564915758176414
average response time (ms) : 10.430269364204822
average cache hit response time (ms) : 0.0991062945630177
average cache miss response time (ms): 13.10489985322625
cache throughput (requests / s) : 95.87480103168338
real throughput (requests / s) : 64.8966729031306
--- frequent_users Results ---
hits: 7557 misses: 2443 ratio: 0.7557
average response time (ms) : 3.1802247285842897
average cache hit response time (ms) : 0.011482802872243217
average cache miss response time (ms): 12.982182457854012
cache throughput (requests / s) : 314.44318730430115
real throughput (requests / s) : 240.3280431399857
--- frequent_after_write Results ---
hits: 1284 misses: 3646 ratio: 0.26044624746450307
average response time (ms) : 9.321854902812966
average cache hit response time (ms) : 0.022931448024381358
average cache miss response time (ms): 12.596626629622769
cache throughput (requests / s) : 107.27478709180934
real throughput (requests / s) : 49.79781897651969
--- weighted_friend_readonly Results ---
hits: 4471 misses: 5529 ratio: 0.4471
average response time (ms) : 7.232451653480529
average cache hit response time (ms) : 0.0870680014470151
average cache miss response time (ms): 13.01053273654109
cache throughput (requests / s) : 138.26570130182094
real throughput (requests / s) : 120.26334295113243

26
tests/utils.py Normal file
View File

@@ -0,0 +1,26 @@
def print_report(hits, request_times, real_time):
hits_count = sum(hits)
miss_count = len(hits) - hits_count
hits_time = 0
miss_time = 0
for i in range(len(request_times)):
if hits[i]:
hits_time += request_times[i]
else:
miss_time += request_times[i]
total_time = hits_time + miss_time
print(f"hits: {hits_count} misses: {miss_count} ratio: { hits_count / (hits_count + miss_count)}")
print(f"average response time (ms) : {total_time / len(request_times)}")
if hits_count > 0:
print(f"average cache hit response time (ms) : {hits_time / hits_count}")
else :
print(f"average cache hit response time (ms) : N/A")
if miss_count > 0:
print(f"average cache miss response time (ms): {miss_time / miss_count}")
else:
print(f"average cache miss response time (ms): N/A")
print(f"cache throughput (requests / s) : { len(request_times) / total_time * 1000}")
print(f"real throughput (requests / s) : { len(request_times) / (real_time)}")

82
tests/varied_workload.py Normal file
View File

@@ -0,0 +1,82 @@
import requests
import random
import json
import time
from tqdm import tqdm
from utils import print_report
baseurl = "http://localhost:8000"
# Fetch all user IDs and friends list
user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
user_friends = json.loads(requests.get(baseurl + "/users_and_friends").content)
random.seed(0)
# Workload Configurations
workloads = {
"random_read": {"read": 1.0, "write": 0.0},
"read_heavy": {"read": 0.8, "write": 0.2},
"write_heavy": {"read": 0.2, "write": 0.8},
"frequent_users": {"read": 1.0, "write": 0.0, "frequent_percent": 0.7},
#"frequent_after_write": {"read": 0.7, "write": 0.3},
#"friend_based": {"read": 1.0, "write": 0.0, "friend_access": 0.7},
}
# Frequent users for workload testing (top 10 users)
frequent_users = user_ids[:10]
def generate_request(workload, last_updated=None):
"""Generate read or write requests based on workload type"""
if random.random() < workload["read"]:
user_id = select_user(workload, last_updated)
return baseurl + f"/user/{user_id}", None, "GET"
# Write operation (updates user profile)
user_id = select_user(workload, last_updated)
write_obj = { "user_id":user_id,"name": "UpdatedUser", "followers":"500","bio":"Updated","posts":"UpdatedPost"}
url = baseurl + f"/update_user/"
return url, write_obj, "POST"
def select_user(workload, last_updated):
"""Selects a user based on workload type"""
if "frequent_percent" in workload and random.random() < workload["frequent_percent"]:
return random.choice(frequent_users)
if "friend_access" in workload and random.random() < workload["friend_access"]:
return random.choice(user_friends.get(str(last_updated), user_ids))
return random.choice(user_ids)
def run_workload(name, workload):
"""Run a workload and log performance metrics"""
times = []
hits = []
start = time.time()
last_updated = None
for _ in tqdm(range(10000), desc=f"Running {name}"):
url, data, method = generate_request(workload, last_updated)
if( method == "GET" ):
response = requests.request(method, url)
else:
response = requests.post(url, json = data)
try:
content = json.loads(response.content)
if "time_ms" in content:
times.append(content["time_ms"])
hits.append(content["source"] == "cache")
if method == "POST":
last_updated = content.get("user_id", last_updated)
except (json.JSONDecodeError, KeyError):
print(f"Error processing response: {response.content}")
end = time.time()
print(f"\n--- {name} Results ---")
print_report(hits, times, end - start)
# Run all workloads
for workload_name, workload_config in workloads.items():
run_workload(workload_name, workload_config)

View File

@@ -0,0 +1,65 @@
import requests
import random
import json
from tqdm import tqdm
import time
from utils import print_report
baseurl = "http://localhost:8000"
endpoints = {
"/user/{user_id}": 0.8, # 80% read operations
"/update_user/?user_id={user_id}&name=Test&followers=100&bio=Updated&posts=Updated": 0.2 # 20% write operations
}
# Fetch all user IDs
user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
random.seed(0)
def generate_random():
"""Randomly generate a read or write request, favoring cache hits."""
endpoint = random.choices(list(endpoints.keys()), list(endpoints.values()))[0]
if endpoint == "/user/{user_id}":
# Favor frequently accessed user IDs to increase hit ratio
if len(user_ids) > 0:
# Sample from a subset of user IDs to simulate frequent access
frequent_users = user_ids[:int(len(user_ids) * 0.2)] # 20% frequent users
random_user = str(random.choice(frequent_users)) if random.random() < 0.7 else str(random.choice(user_ids))
else:
random_user = str(random.choice(user_ids))
else:
random_user = str(random.choice(user_ids))
return baseurl + endpoint.replace("{user_id}", random_user)
times = []
hits = []
# Warm-up phase
# for _ in tqdm(range(1000)):
# url = generate_random()
# requests.get(url)
# Main testing phase
start = time.time()
for i in tqdm(range(10000)):
url = generate_random()
response = requests.get(url)
try:
content = json.loads(response.content)
if "time_ms" in content: # Only process if "time_ms" exists
times.append(content["time_ms"])
hits.append(content["source"] == "cache")
except json.JSONDecodeError:
print(f"Error decoding JSON: {response.content}")
exit(1)
except KeyError:
print(f"Unexpected response format: {content}")
exit(1)
end = time.time()
print_report(hits, times, end - start)

View File

@@ -0,0 +1,47 @@
# Tests latency and hit rate of endpoints. Can be configured with weighted averages for various endpoints.
import requests
import random
import json
from tqdm import tqdm
import time
from utils import print_report
baseurl = "http://localhost:8000"
chance_of_selecting_friend = 0.7
user_friends = json.loads(requests.get(baseurl + "/users_and_friends").content)
user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
random.seed(0)
def fetch_friend(prob):
return random.random() < prob
def generate_random():
random_user = str(random.choice(user_ids))
return random_user
def generate_random_friend(user):
next_user = str(random.choice(user_friends[user]))
return next_user
times = []
hits = []
start = time.time()
curr_user = generate_random()
for i in tqdm(range(10000)):
url = baseurl + "/user/" + curr_user
response = requests.get(url)
content = json.loads(response.content)
times.append(content["time_ms"])
hits.append(content["source"] == "cache")
if fetch_friend(chance_of_selecting_friend):
curr_user = generate_random_friend(curr_user)
else:
curr_user = generate_random()
end = time.time()
print_report(hits, times, end - start)