update results for baseline, ideal, nocache, prefetch, and tiered

add results for readafterwrite remove frequent_after_write and friend_based from varied workload
Added test specifically for read_after_write
2026-02-03 06:31:02 +00:00 · 2025-03-04 07:17:30 +00:00 · 2025-03-03 22:50:49 -08:00 · 2025-03-03 22:48:35 -08:00 · 2025-03-04 06:30:14 +00:00 · 2025-03-04 06:29:06 +00:00
35 changed files with 24996 additions and 81 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ __pycache__/
 # Ignore database files (TinyDB JSON)
 database.json
 temp_DB.json
 # Ignore environment variables file (if used)
 .env
@@ -22,4 +23,3 @@ database.json
 # Ignore MacOS system files
 .DS_Store
--- a/app/cache/eviction_sieve.py
+++ b/app/cache/eviction_sieve.py
@@ -8,7 +8,7 @@ class Node:
        self.next = None
        self.prev = None
-class SeiveCache(Cache):
+class SieveCache(Cache):
    def __init__(self, limit: int):
        super().__init__(limit)
        self.limit = limit  # Fix: Store limit properly
@@ -18,13 +18,13 @@ class SeiveCache(Cache):
        self.hand = None
    def print_cache_state(self):
-        print("Current cache state:")
+        #print("Current cache state:")
        node = self.head
        if not node:
-            print("Cache is empty.")
+            #print("Cache is empty.")
            return
        for _ in range(len(self.cache)):
-            print(f"Key: {node.key}, Value: {node.value}, Visited: {node.visited}")
+            #print(f"Key: {node.key}, Value: {node.value}, Visited: {node.visited}")
            node = node.next
            if node == self.head:
                break
@@ -33,20 +33,17 @@ class SeiveCache(Cache):
        if key in self.cache:
            node = self.cache[key]
            node.visited = True
-            print(f"GET {key}: {node.value}")
+            #self.print_cache_state()
            self.print_cache_state()
            return node.value
        print(f"GET {key}: MISS")
        self.print_cache_state()
        return None
    def put(self, key: str, val: str) -> bool:
        print(f"PUT {key}: {val}")
        if key in self.cache:
            node = self.cache[key]
            node.value = val
            node.visited = True
-            self.print_cache_state()
+            #self.print_cache_state()
            return False  # No eviction needed
        new_node = Node(key, val)
@@ -66,11 +63,10 @@ class SeiveCache(Cache):
        self.cache[key] = new_node
        if not self.hand:
            self.hand = self.head
-        self.print_cache_state()
+        #self.print_cache_state()
        return False
    def invalidate(self, key: str) -> bool:
        print(f"INVALIDATE {key}")
        if key in self.cache:
            node = self.cache.pop(key)
            if node == self.head:
@@ -81,16 +77,14 @@ class SeiveCache(Cache):
                node.next.prev = node.prev
            if node.prev:
                node.prev.next = node.next
-            self.print_cache_state()
+            #self.print_cache_state()
            return True
        print("INVALIDATE FAILED: Key not found")
        return False
    def next_hand(self):
        self.hand = self.hand.next if self.hand.next else self.head
    def evict(self):
        print("EVICTION START")
        while self.hand.visited:
            self.hand.visited = False
            self.next_hand()
@@ -107,8 +101,7 @@ class SeiveCache(Cache):
            obj_to_evict.prev.next = obj_to_evict.next
        del self.cache[obj_to_evict.key]
-        print(f"EVICTED {obj_to_evict.key}")
+        #self.print_cache_state()
        self.print_cache_state()
 # Basic API demo for future testing
 if __name__ == "__main__":
--- a/app/cache/idealcache.py
+++ b/app/cache/idealcache.py
@@ -0,0 +1,16 @@
 from .cache import Cache
 from database import get_user_profile
 class IdealCache(Cache):
    def __init__(self, limit: int):
        pass
    def get(self, key):
        return get_user_profile(key)
    def put(self, key, val):
        return False
    def invalidate(self, key):
        return False
--- a/app/cache/nocache.py
+++ b/app/cache/nocache.py
@@ -0,0 +1,15 @@
 from .cache import Cache
 class NoCache(Cache):
    def __init__(self, limit: int):
        pass
    def get(self, key):
        return None
    def put(self, key, val):
        return False
    def invalidate(self, key):
        return False
--- a/app/cache/prefetch_cache.py
+++ b/app/cache/prefetch_cache.py
@@ -1,11 +1,29 @@
-from .cache import BaselineCache
+from .cache import Cache
 from database import get_user_profile, get_friends
 from collections import OrderedDict
 import math
-class PrefetchCache(BaselineCache):
+class PrefetchCache(Cache):
-    key_relations = None
+    limit = None
    cache = None
-    def __init__(self):
+    def __init__(self, limit):
        super()
-        self.key_relations = dict()
+        self.limit = limit
        self.cache = OrderedDict()
    def __eq__(self, other):
        return self.cache == other
    def __len__(self):
        return len(self.cache)
    def get(self, key: str) -> str:
        if key in self.cache:
            self.cache.move_to_end(key)
            return self.cache[key]
        else:
            return None
    def put(self, key: str, val: str) -> bool:
        # LRU evict
@@ -14,16 +32,26 @@ class PrefetchCache(BaselineCache):
            self.cache.popitem(last = False)
            evict = True
        self.cache[key] = val   
-        self.prefetch(key, val)
+        if self.prefetch(val):
            evict = True
        return evict
-    def prefetch(self, key: str, val: str) -> bool:
+    def prefetch(self, profile) -> bool:
-        if len(self.cache) >= self.limit and key in self.key_relations:
+        evict = False
-            self.cache[self.key_relations[key][0]] = self.key_relations[key][1]
+        friends_prof = get_friends(profile["user_id"], math.ceil(self.limit*0.1))
-            return True
+        for i in friends_prof:
-        return False
+            if len(self.cache) >= self.limit:
                self.cache.popitem(last = False)
                evict = True
            self.cache[i] = friends_prof[i]
        return evict
-    def set_relations(self, key: str, related_key: str, related_val: str):
+    def invalidate(self, key: str) -> bool:
-        self.key_relations[key] = related_key | related_val
+        # basic delete invalidation, no (p)refetching
-        return
+        if key in self.cache:
            del self.cache[key]
            return True
        else:
            return False
--- a/app/cache/read_after_write_cache.py
+++ b/app/cache/read_after_write_cache.py
@@ -0,0 +1,20 @@
 from .cache import BaselineCache
 from database import get_user_profile
 class ReadAfterWriteCache(BaselineCache):
    def __init__(self, limit):
        super().__init__( limit )
    def invalidate(self, key: str) -> bool:
        # basic delete invalidation, but after writing, we immediately read the value and add it to the cache
        invalidated = False
        if key in self.cache:
            del self.cache[key]
            invalidated = True
        newData = get_user_profile( key )
        self.put( key, newData )
        return invalidated
--- a/app/cache/tiered_cache.py
+++ b/app/cache/tiered_cache.py
@@ -21,6 +21,11 @@ class TieredCache(BaselineCache):
                f = open(self.l2_map[key], "r")
                v = f.read()
                f.close()
                # we will also preemptively return the value from l1 to l2:
                del self.l2_map[key]
                self.put(key, v)
                return v
            else: # otherwise its a cache miss and return None
                return None
--- a/app/config.py
+++ b/app/config.py
@@ -1,7 +1,8 @@
 import os
 import yaml
 import sys
-CONFIG_FILE = "config.yaml"
+CONFIG_FILE = sys.argv[1]
 def load_config():
    with open(CONFIG_FILE, "r") as f:
--- a/app/config.yaml
+++ b/app/config.yaml
@@ -1,4 +0,0 @@
 cache_strategy: "Tiered"  # Change this to "Prefetch" or "Tiered" or "Seive"
 cache_limit: 10
 l2_cache_limit: 100
 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
--- a/app/config_baseline.yaml
+++ b/app/config_baseline.yaml
@@ -0,0 +1,4 @@
 cache_strategy: "Baseline"
 cache_limit: 50
 l2_cache_limit: 100 # unused
 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
--- a/app/config_ideal.yaml
+++ b/app/config_ideal.yaml
@@ -0,0 +1,4 @@
 cache_strategy: "Ideal"
 cache_limit: 50
 l2_cache_limit: 100 # unused
 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
--- a/app/config_nocache.yaml
+++ b/app/config_nocache.yaml
@@ -0,0 +1,4 @@
 cache_strategy: "None"
 cache_limit: 50
 l2_cache_limit: 100 # unused
 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
--- a/app/config_prefetch.yaml
+++ b/app/config_prefetch.yaml
@@ -0,0 +1,4 @@
 cache_strategy: "Prefetch"
 cache_limit: 50
 l2_cache_limit: 100 # unused
 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
--- a/app/config_readafterwrite.yaml
+++ b/app/config_readafterwrite.yaml
@@ -0,0 +1,4 @@
 cache_strategy: "ReadAfterWrite"
 cache_limit: 50
 l2_cache_limit: 100 # unused
 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
--- a/app/config_sieve.yaml
+++ b/app/config_sieve.yaml
@@ -0,0 +1,4 @@
 cache_strategy: "Sieve"
 cache_limit: 50
 l2_cache_limit: 100 # unused
 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
--- a/app/config_tiered.yaml
+++ b/app/config_tiered.yaml
@@ -0,0 +1,4 @@
 cache_strategy: "Tiered"
 cache_limit: 25
 l2_cache_limit: 175
 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
--- a/app/database.py
+++ b/app/database.py
@@ -1,24 +1,53 @@
 from tinydb import TinyDB, Query
 from config import DB_FILE
 import shutil
 import random
 DB_LOCATION = "database/datastore/" + DB_FILE
 # Initialize TinyDB as a NoSQL key-value store
-db = TinyDB(DB_LOCATION)
+# We don't want to change our main DB file, so we will make a temp DB file and use that as our DB file
 shutil.copyfile( DB_LOCATION, "temp_DB.json" )
 db = TinyDB("temp_DB.json")
 User = Query()
 def get_user_ids():
    return [x["user_id"] for x in db.all()]
 def get_user_friend_ids():
    user_friends = {}
    for x in db.all():
        user_friends[x["user_id"]] = x["friends"]
    return user_friends
 def get_friends(user_id, num_friends):
    friends = {}
    curr_user = db.search(User.user_id == user_id)
    random.seed(0)
    if not curr_user:
        return {}
    sample_size = min(num_friends, len(curr_user[0]["friends"]))
    for f in random.sample(curr_user[0]["friends"], sample_size):
        friends[f] = db.search(User.user_id == user_id)[0]
    return friends
 def get_user_profile(user_id):
    """Fetch user profile from TinyDB"""
    result = db.search(User.user_id == user_id)
    return result[0] if result else None
-def update_user_profile(user_id, name, followers, bio, posts, friends):
+def update_user_profile( data ):
    """Update user profile in TinyDB"""
-    db.upsert({"user_id": user_id, "name": name, "followers": followers, "bio": bio, "posts": posts, "friends": friends}, User.user_id == user_id)
+    user_id = str( data["user_id"] )
    # Basically make sure friends stay the same (for prefetching). Not great implementation, but it works
    curr_user = db.search(User.user_id == user_id)
    if( curr_user and data["friends"] == None ):
        data["friends"] = curr_user[0]["friends"]
    db.upsert( data, User.user_id == user_id )
 def init_db():
    """Ensure TinyDB is initialized before FastAPI starts and prepopulate some data"""
@@ -28,7 +57,7 @@ def init_db():
    # Prepopulate database with some sample users if empty
    if len(db) == 0:
        db.insert_multiple([
-            {"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!"},
+            {"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!", "friends": ["2"]},
-            {"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!"},
+            {"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!","friends": ["3", "1"]},
-            {"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!"}
+            {"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!", "friends": ["1"]}
        ])
--- a/app/database/datastore/llmData_sns.json
+++ b/app/database/datastore/llmData_sns.json
--- a/app/database/generate_data.py
+++ b/app/database/generate_data.py
@@ -6,6 +6,10 @@ warnings.filterwarnings('ignore')
 import re
 import random
 import json
 from tinydb import TinyDB
 from tinydb.storages import JSONStorage
 from tinydb.middlewares import CachingMiddleware
 import math
 HUGGINGFACEHUB_API_TOKEN = None
 os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
@@ -21,13 +25,11 @@ def parse_profile(text, user_id, num_users):
    match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text)
    name, bio, posts = match.groups()
    # Generate mock followers count (randomized for realism)
    followers = random.randint(10, 5000)
-    # Generate mock friends (users with nearby IDs)
+    friend_ids = [str(fid) for fid in range(user_id) if fid != user_id]
    friend_ids = [str(fid) for fid in range(num_users) if fid != user_id]
    random.shuffle(friend_ids)
-    friends = friend_ids[:random.randint(1, num_users-1)]  # Each user gets 1-5 friends
+    friends = friend_ids[:random.randint(1, min(100, math.ceil(num_users/3)))] 
    return {
        "user_id": str(user_id),
@@ -38,7 +40,7 @@ def parse_profile(text, user_id, num_users):
        "friends": friends
    }
-def generate_data(num_users):
+def generate_data(base_id, num_users):
    system_message = """You are a data generator creating user profiles for a social media app. 
    Always provide user profiles in this format: Name | Interest | Recent Activity.
    Do not include numbers, IDs, or assistant labels. Only return a properly formatted response.
@@ -60,25 +62,30 @@ def generate_data(num_users):
        huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
    )
    llm_chain = prompt | llm
-    data = {}
+    data = []
-    i = 0
+    i = base_id
    user_id = 0
    while user_id < num_users: 
        raw_text = llm_chain.invoke({"user_id": i})
        while not valid_data(raw_text):
            i = i + 1
            raw_text = llm_chain.invoke({"user_id": i})
-        user_profile = parse_profile(raw_text, user_id, num_users)
+        user_profile = parse_profile(raw_text, base_id + user_id, num_users)
        user_id = user_id + 1
-        data[user_id] = user_profile
+        i = i + 1
        data.append(user_profile)
    return data
 if __name__ == "__main__":
-    data = generate_data(100)
+    base_id = input("Enter base id (check db to find the next consecutive user_id): ")
-
+    num_users = input("Enter number of users to generate: ")
    data = generate_data(int(base_id), int(num_users))
    # Create json file
-    json_object = json.dumps( {"_default": data}, indent=4 )
+    file_path = "datastore/llmData_sns.json"
-    with open( "datastore/llmData_sns.json", "w" ) as f:
+    global db
-        f.write( json_object )
+    db = TinyDB(file_path, storage=CachingMiddleware(JSONStorage), indent=4)
    db.insert_multiple(data)
    db.close()
--- a/app/database/generate_data_basic.py
+++ b/app/database/generate_data_basic.py
@@ -0,0 +1,33 @@
 import random, string
 from tinydb import TinyDB
 from tinydb.storages import JSONStorage
 from tinydb.middlewares import CachingMiddleware
 def randomstring(length):
   letters = string.ascii_lowercase
   return ''.join(random.choice(letters) for i in range(length))
 def generate_data(base_id, num_users):
    data = []
    for i in range(num_users):
        data.append({
        "user_id": str(base_id + i),
        "name": randomstring(10),
        "followers": random.randint(1, 10000),
        "bio": randomstring(250),
        "posts": randomstring(100),
        "friends": random.choices(range(base_id + i - 1), k=random.randint(1, 50))
    })
    return data
 base_id = input("Enter base id (check db to find the next consecutive user_id): ")
 num_users = input("Enter number of users to generate: ")
 data = generate_data(int(base_id), int(num_users))
 # Create json file
 file_path = "datastore/llmData_sns.json"
 global db
 db = TinyDB(file_path, storage=CachingMiddleware(JSONStorage), indent=4)
 db.insert_multiple(data)
 db.close()
--- a/app/main.py
+++ b/app/main.py
@@ -1,23 +1,40 @@
 from fastapi import FastAPI, HTTPException
-from database import get_user_ids, get_user_profile, update_user_profile
+from database import get_user_ids, get_user_profile, update_user_profile, get_user_friend_ids
 from cache.cache import BaselineCache
 from cache.prefetch_cache import PrefetchCache
 from cache.tiered_cache import TieredCache
-from cache.eviction_seive import SeiveCache
+from cache.eviction_sieve import SieveCache
 from cache.nocache import NoCache
 from cache.idealcache import IdealCache
 from cache.read_after_write_cache import ReadAfterWriteCache
 from config import CACHE_STRATEGY, CACHE_LIMIT, L2_CACHE_LIMIT
 from models.models import User
 import time
 app = FastAPI()
 # Initialize cache based on strategy from config.yaml or environment variable
 if CACHE_STRATEGY == "Baseline":
    print("Using baseline cache strategy")
    cache = BaselineCache(limit=CACHE_LIMIT)
 elif CACHE_STRATEGY == "Prefetch":
    print("Using prefetch cache strategy")
    cache = PrefetchCache(limit=CACHE_LIMIT)
 elif CACHE_STRATEGY == "Tiered":
    print("Using tiered cache strategy")
    cache = TieredCache(limit=CACHE_LIMIT, l2_limit=L2_CACHE_LIMIT)
-elif CACHE_STRATEGY == "Seive":
+elif CACHE_STRATEGY == "Sieve":
-    cache = SeiveCache(limit=CACHE_LIMIT)
+    print("Using sieve cache strategy")
    cache = SieveCache(limit=CACHE_LIMIT)
 elif CACHE_STRATEGY == "None":
    print("Using no cache strategy")
    cache = NoCache(limit=CACHE_LIMIT)
 elif CACHE_STRATEGY == "Ideal":
    print("Using ideal cache strategy")
    cache = IdealCache(limit=CACHE_LIMIT)
 elif CACHE_STRATEGY == "ReadAfterWrite":
    print("Using read-after-write cache strategy")
    cache = ReadAfterWriteCache(limit=CACHE_LIMIT)
 else:
    raise ValueError(f"Invalid CACHE_STRATEGY: {CACHE_STRATEGY}")
@@ -25,6 +42,10 @@ else:
 def fetch_user_ids():
    return {"ids": get_user_ids()}
@app.get("/users_and_friends")
 def fetch_user_and_friends():
    return get_user_friend_ids()
@app.get("/user/{user_id}")
 def fetch_user_profile(user_id: str):
    """Fetch user profile with caching"""
@@ -34,6 +55,7 @@ def fetch_user_profile(user_id: str):
        return {"user_id": user_id, "profile": cached_profile, "source": "cache", "time_ms": (time.time() - start) * 1000}
    profile = get_user_profile(user_id)
    time.sleep(10 / 1000) # simulate 10 ms db delay, we do this here instead of the actual db in the ideal cache case
    if profile is None:
        raise HTTPException(status_code=404, detail="User not found")
@@ -41,8 +63,11 @@ def fetch_user_profile(user_id: str):
    return {"user_id": user_id, "profile": profile, "source": "database", "time_ms": (time.time() - start) * 1000}
@app.post("/update_user/")
-def modify_user_profile(user_id: str, name: str, followers: int, bio: str, posts: str, friends: list[str]):
+async def modify_user_profile(user_data : User):
    """Update user profile and refresh cache"""
-    update_user_profile(user_id, name, followers, bio, posts, friends)
+    user_id=user_data.user_id
    user_dict = user_data.dict()
    update_user_profile(user_dict)
    cache.invalidate(user_id)  # Invalidate old cache
    return {"message": "User profile updated successfully"}
--- a/app/models/models.py
+++ b/app/models/models.py
@@ -0,0 +1,9 @@
 from pydantic import BaseModel
 class User(BaseModel):
    user_id: str
    name: str | None = None
    followers: int | None = None
    bio: str | None = None
    posts: str | None = None
    friends: list | None = None
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ uvicorn
 tinydb
 pyyaml
 requests
 tqdm
--- a/tests/frequent_after_write.py
+++ b/tests/frequent_after_write.py
@@ -0,0 +1,71 @@
 import requests
 import random
 import json
 from tqdm import tqdm
 import time
 from collections import deque
 from utils import print_report
 baseurl = "http://localhost:8000"
 endpoints = {
    "/user/{user_id}": 0.5,  # 50% read operations
    "/update_user/?user_id={user_id}&name=Test&followers=100&bio=Updated&posts=Updated": 0.5  # 50% write operations
 }
 # Fetch all user IDs
 user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
 random.seed(0)
 prev_updated_users = deque()
 def generate_random():
    """Randomly generate a read or write request, favoring cache hits."""
    endpoint = random.choices(list(endpoints.keys()), list(endpoints.values()))[0]
    # Reads
    if endpoint == "/user/{user_id}":
        # Favor frequently accessed user IDs to increase hit ratio
        if( prev_updated_users ):
            random_user = str(random.choice(prev_updated_users)) if random.random() < 0.7 else str(random.choice(user_ids))
        else:
            random_user = str(random.choice(user_ids))
        return baseurl + endpoint.replace("{user_id}", random_user)
    # Writes
    else:
        random_user = str(random.choice(user_ids))
        prev_updated_users.append( random_user )
        if( len( prev_updated_users ) > 10 ):
            prev_updated_users.popleft()
        return random_user
 times = []
 hits = []
 start = time.time()
 for i in tqdm(range(10000)):
    url = generate_random()
    if( "user" not in url ):
        write_obj = { "user_id":url,"name": "Test", "followers":"100","bio":"updated","posts":"updated"}
        response = requests.post("http://localhost:8000/update_user/", json = write_obj)
    else:
        response = requests.get(url)
    try:
        content = json.loads(response.content)
        if "time_ms" in content:  # Only process if "time_ms" exists
            times.append(content["time_ms"])
            hits.append(content["source"] == "cache")
    except json.JSONDecodeError:
        print(f"Error decoding JSON: {response.content}")
        exit(1)
    except KeyError:
        print(f"Unexpected response format: {content}")
        exit(1)
 end = time.time()
 print(f"\n--- Results ---")
 print_report(hits, times, end - start)
--- a/tests/random_readonly.py
+++ b/tests/random_readonly.py
@@ -5,6 +5,7 @@ import random
 import json
 from tqdm import tqdm
 import time
 from utils import print_report
 baseurl = "http://localhost:8000"
@@ -36,21 +37,4 @@ for i in tqdm(range(10000)):
    hits.append(content["source"] == "cache")
 end = time.time()
-hits_count = sum(hits)
+print_report(hits, times, end - start)
 miss_count = len(hits) - hits_count
 hits_time = 0
 miss_time = 0
 for i in range(len(times)):
    if hits[i]:
        hits_time += times[i]
    else:
        miss_time += times[i]
 total_time = hits_time + miss_time
 print(f"hits: {hits_count} misses: {miss_count} ratio: { hits_count / (hits_count + miss_count)}")
 print(f"average response time (ms)           : {total_time / len(times)}")
 print(f"average cache hit response time (ms) : {hits_time / hits_count}")
 print(f"average cache miss response time (ms): {miss_time / miss_count}")
 print(f"cache throughput (requests / ms)     : { len(times) / total_time}")
 print(f"real throughput  (requests / ms)     : { len(times) / (end - start) / 1000}")
--- a/tests/results_baseline
+++ b/tests/results_baseline
@@ -0,0 +1,48 @@
 --- random_read Results ---
 hits: 507 misses: 9493 ratio: 0.0507
 average response time (ms)           : 12.171702599525451
 average cache hit response time (ms) : 0.0013355202458549063
 average cache miss response time (ms): 12.821694815810583
 cache throughput (requests / s)      : 82.15777470926605
 real throughput  (requests / s)      : 75.03700407782604
 --- read_heavy Results ---
 hits: 394 misses: 7631 ratio: 0.04909657320872274
 average response time (ms)           : 12.201453324790313
 average cache hit response time (ms) : 0.0013572915556467125
 average cache miss response time (ms): 12.831362620701
 cache throughput (requests / s)      : 81.95744993493923
 real throughput  (requests / s)      : 73.58096764800857
 --- write_heavy Results ---
 hits: 112 misses: 1906 ratio: 0.05550049554013875
 average response time (ms)           : 12.256505940666992
 average cache hit response time (ms) : 0.0013560056686401367
 average cache miss response time (ms): 12.976640669271303
 cache throughput (requests / s)      : 81.58932120140436
 real throughput  (requests / s)      : 58.17744310065741
 --- frequent_users Results ---
 hits: 7096 misses: 2904 ratio: 0.7096
 average response time (ms)           : 3.7240190267562867
 average cache hit response time (ms) : 0.000715490930378773
 average cache miss response time (ms): 12.822008658374966
 cache throughput (requests / s)      : 268.52709205168185
 real throughput  (requests / s)      : 211.60014253369587
 --- frequent_after_write Results ---
 hits: 1068 misses: 3862 ratio: 0.21663286004056795
 average response time (ms)           : 9.787649653746197
 average cache hit response time (ms) : 0.0015231553981366675
 average cache miss response time (ms): 12.493911461160938
 cache throughput (requests / s)      : 102.16957445113012
 real throughput  (requests / s)      : 48.726267135940624
 --- weighted_friend_readonly Results ---
 hits: 1275 misses: 8725 ratio: 0.1275
 average response time (ms)           : 11.204515027999879
 average cache hit response time (ms) : 0.0013659982120289521
 average cache miss response time (ms): 12.841651419172313
 cache throughput (requests / s)      : 89.249735263063
 real throughput  (requests / s)      : 80.8252530462673
--- a/tests/results_ideal
+++ b/tests/results_ideal
@@ -0,0 +1,48 @@
 --- random_read Results ---
 hits: 10000 misses: 0 ratio: 1.0
 average response time (ms)           : 2.7374683618545532
 average cache hit response time (ms) : 2.7374683618545532
 average cache miss response time (ms): N/A
 cache throughput (requests / s)      : 365.3010255514076
 real throughput  (requests / s)      : 264.62084114988136
 --- read_heavy Results ---
 hits: 8025 misses: 0 ratio: 1.0
 average response time (ms)           : 2.3531507034539434
 average cache hit response time (ms) : 2.3531507034539434
 average cache miss response time (ms): N/A
 cache throughput (requests / s)      : 424.96215755846185
 real throughput  (requests / s)      : 174.59831308637462
 --- write_heavy Results ---
 hits: 2018 misses: 0 ratio: 1.0
 average response time (ms)           : 2.5427952275876367
 average cache hit response time (ms) : 2.5427952275876367
 average cache miss response time (ms): N/A
 cache throughput (requests / s)      : 393.26800253149185
 real throughput  (requests / s)      : 24.773972578001025
 --- frequent_users Results ---
 hits: 10000 misses: 0 ratio: 1.0
 average response time (ms)           : 1.3468331575393677
 average cache hit response time (ms) : 1.3468331575393677
 average cache miss response time (ms): N/A
 cache throughput (requests / s)      : 742.4824629555278
 real throughput  (requests / s)      : 424.7398305196224
 --- frequent_after_write Results ---
 hits: 4930 misses: 0 ratio: 1.0
 average response time (ms)           : 2.266260528177567
 average cache hit response time (ms) : 2.266260528177567
 average cache miss response time (ms): N/A
 cache throughput (requests / s)      : 441.25553420116205
 real throughput  (requests / s)      : 77.73575713810418
 --- weighted_friend_readonly Results ---
 hits: 10000 misses: 0 ratio: 1.0
 average response time (ms)           : 2.7157030582427977
 average cache hit response time (ms) : 2.7157030582427977
 average cache miss response time (ms): N/A
 cache throughput (requests / s)      : 368.228771170237
 real throughput  (requests / s)      : 267.4051580450448
--- a/tests/results_nocache
+++ b/tests/results_nocache
@@ -0,0 +1,48 @@
 --- random_read Results ---
 hits: 0 misses: 10000 ratio: 0.0
 average response time (ms)           : 12.805271649360657
 average cache hit response time (ms) : N/A
 average cache miss response time (ms): 12.805271649360657
 cache throughput (requests / s)     : 78.09283765174385
 real throughput  (requests / s)     : 71.53643320734811
 --- read_heavy Results ---
 hits: 0 misses: 8025 ratio: 0.0
 average response time (ms)           : 12.849865197392639
 average cache hit response time (ms) : N/A
 average cache miss response time (ms): 12.849865197392639
 cache throughput (requests / s)     : 77.82182806111534
 real throughput  (requests / s)     : 70.04929255901189
 --- write_heavy Results ---
 hits: 0 misses: 2018 ratio: 0.0
 average response time (ms)           : 12.95277124826452
 average cache hit response time (ms) : N/A
 average cache miss response time (ms): 12.95277124826452
 cache throughput (requests / s)     : 77.20355596752974
 real throughput  (requests / s)     : 55.53823196963498
 --- frequent_users Results ---
 hits: 0 misses: 10000 ratio: 0.0
 average response time (ms)           : 11.695257019996642
 average cache hit response time (ms) : N/A
 average cache miss response time (ms): 11.695257019996642
 cache throughput (requests / s)     : 85.50474763318087
 real throughput  (requests / s)     : 77.81244515584922
 --- frequent_after_write Results ---
 hits: 0 misses: 4930 ratio: 0.0
 average response time (ms)           : 12.357432702008415
 average cache hit response time (ms) : N/A
 average cache miss response time (ms): 12.357432702008415
 cache throughput (requests / s)      : 80.92295739045157
 real throughput  (requests / s)      : 43.34797667618127
 --- weighted_friend_readonly Results ---
 hits: 0 misses: 10000 ratio: 0.0
 average response time (ms)           : 12.757544946670532
 average cache hit response time (ms) : N/A
 average cache miss response time (ms): 12.757544946670532
 cache throughput (requests / s)      : 78.38498740786174
 real throughput  (requests / s)      : 71.68974303163175
--- a/tests/results_prefetch
+++ b/tests/results_prefetch
@@ -0,0 +1,48 @@
 --- random_read Results ---
 hits: 499 misses: 9501 ratio: 0.0499
 average response time (ms)           : 12.209891486167908
 average cache hit response time (ms) : 0.0013277860347159162
 average cache miss response time (ms): 12.851094863324677
 cache throughput (requests / s)      : 81.90080977647176
 real throughput  (requests / s)      : 74.83126189192693
 --- read_heavy Results ---
 hits: 405 misses: 7620 ratio: 0.05046728971962617
 average response time (ms)           : 11.846880363155377
 average cache hit response time (ms) : 0.0013816503830897955
 average cache miss response time (ms): 12.476463956157053
 cache throughput (requests / s)      : 84.41040757954049
 real throughput  (requests / s)      : 65.28853446272649
 --- write_heavy Results ---
 hits: 95 misses: 1923 ratio: 0.04707631318136769
 average response time (ms)           : 12.044375786341808
 average cache hit response time (ms) : 0.0017768458316200658
 average cache miss response time (ms): 12.639303971130403
 cache throughput (requests / s)      : 83.02630354110914
 real throughput  (requests / s)      : 20.054516595218445
 --- frequent_users Results ---
 hits: 5788 misses: 4212 ratio: 0.5788
 average response time (ms)           : 5.223854732513428
 average cache hit response time (ms) : 0.0010156268981216525
 average cache miss response time (ms): 12.400918536716038
 cache throughput (requests / s)      : 191.42951923528236
 real throughput  (requests / s)      : 159.80528851176524
 --- frequent_after_write Results ---
 hits: 1047 misses: 3883 ratio: 0.2123732251521298
 average response time (ms)           : 9.824660583635373
 average cache hit response time (ms) : 0.0011850337016662189
 average cache miss response time (ms): 12.47343186892525
 cache throughput (requests / s)      : 101.78468675708436
 real throughput  (requests / s)      : 48.92688928507726
 --- weighted_friend_readonly Results ---
 hits: 2838 misses: 7162 ratio: 0.2838
 average response time (ms)           : 9.23909306526184
 average cache hit response time (ms) : 0.0012533357229091652
 average cache miss response time (ms): 12.899661223937
 cache throughput (requests / s)      : 108.23573189882784
 real throughput  (requests / s)      : 96.54757454253809
--- a/tests/results_readafterwrite
+++ b/tests/results_readafterwrite
@@ -0,0 +1,48 @@
 --- random_read Results ---
 hits: 505 misses: 9495 ratio: 0.0505
 average response time (ms)           : 12.217395067214966
 average cache hit response time (ms) : 0.0012926536031288675
 average cache miss response time (ms): 12.867119313541872
 cache throughput (requests / s)      : 81.85050859847135
 real throughput  (requests / s)      : 74.74830604127537
 --- read_heavy Results ---
 hits: 394 misses: 7631 ratio: 0.04909657320872274
 average response time (ms)           : 12.254692267777392
 average cache hit response time (ms) : 0.001333086623758229
 average cache miss response time (ms): 12.887351620073884
 cache throughput (requests / s)      : 81.6013962773598
 real throughput  (requests / s)      : 73.22107615138934
 --- write_heavy Results ---
 hits: 112 misses: 1906 ratio: 0.05550049554013875
 average response time (ms)           : 12.277225436729529
 average cache hit response time (ms) : 0.001330460820879255
 average cache miss response time (ms): 12.998579181378936
 cache throughput (requests / s)      : 81.45162806967119
 real throughput  (requests / s)      : 57.99762893791816
 --- frequent_users Results ---
 hits: 7096 misses: 2904 ratio: 0.7096
 average response time (ms)           : 3.7444978952407837
 average cache hit response time (ms) : 0.0007732812285826467
 average cache miss response time (ms): 12.8923869658436
 cache throughput (requests / s)      : 267.05850236182243
 real throughput  (requests / s)      : 210.51762371076964
 --- frequent_after_write Results ---
 hits: 3509 misses: 1421 ratio: 0.711764705882353
 average response time (ms)           : 3.600564883156674
 average cache hit response time (ms) : 0.0013591693520511987
 average cache miss response time (ms): 12.488399400919109
 cache throughput (requests / s)      : 277.7341979526512
 real throughput  (requests / s)      : 60.02808337161936
 --- weighted_friend_readonly Results ---
 hits: 1275 misses: 8725 ratio: 0.1275
 average response time (ms)           : 11.231861686706543
 average cache hit response time (ms) : 0.0015234479717179841
 average cache miss response time (ms): 12.872971286063208
 cache throughput (requests / s)      : 89.03243539613284
 real throughput  (requests / s)      : 80.69348254832964
--- a/tests/results_tiered
+++ b/tests/results_tiered
@@ -0,0 +1,48 @@
 --- random_read Results ---
 hits: 1966 misses: 8034 ratio: 0.1966
 average response time (ms)           : 10.438107824325561
 average cache hit response time (ms) : 0.09135385010759076
 average cache miss response time (ms): 12.97006180905453
 cache throughput (requests / s)      : 95.80280418923658
 real throughput  (requests / s)      : 86.41953468377949
 --- read_heavy Results ---
 hits: 1563 misses: 6462 ratio: 0.19476635514018692
 average response time (ms)           : 10.474605560302734
 average cache hit response time (ms) : 0.0937643984686619
 average cache miss response time (ms): 12.98547754048637
 cache throughput (requests / s)      : 95.46898871207694
 real throughput  (requests / s)      : 84.39105440656928
 --- write_heavy Results ---
 hits: 415 misses: 1603 ratio: 0.20564915758176414
 average response time (ms)           : 10.430269364204822
 average cache hit response time (ms) : 0.0991062945630177
 average cache miss response time (ms): 13.10489985322625
 cache throughput (requests / s)      : 95.87480103168338
 real throughput  (requests / s)      : 64.8966729031306
 --- frequent_users Results ---
 hits: 7557 misses: 2443 ratio: 0.7557
 average response time (ms)           : 3.1802247285842897
 average cache hit response time (ms) : 0.011482802872243217
 average cache miss response time (ms): 12.982182457854012
 cache throughput (requests / s)      : 314.44318730430115
 real throughput  (requests / s)      : 240.3280431399857
 --- frequent_after_write Results ---
 hits: 1284 misses: 3646 ratio: 0.26044624746450307
 average response time (ms)           : 9.321854902812966
 average cache hit response time (ms) : 0.022931448024381358
 average cache miss response time (ms): 12.596626629622769
 cache throughput (requests / s)      : 107.27478709180934
 real throughput  (requests / s)      : 49.79781897651969
 --- weighted_friend_readonly Results ---
 hits: 4471 misses: 5529 ratio: 0.4471
 average response time (ms)           : 7.232451653480529
 average cache hit response time (ms) : 0.0870680014470151
 average cache miss response time (ms): 13.01053273654109
 cache throughput (requests / s)      : 138.26570130182094
 real throughput  (requests / s)      : 120.26334295113243
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -0,0 +1,26 @@
 def print_report(hits, request_times, real_time):
    hits_count = sum(hits)
    miss_count = len(hits) - hits_count
    hits_time = 0
    miss_time = 0
    for i in range(len(request_times)):
        if hits[i]:
            hits_time += request_times[i]
        else:
            miss_time += request_times[i]
    total_time = hits_time + miss_time
    print(f"hits: {hits_count} misses: {miss_count} ratio: { hits_count / (hits_count + miss_count)}")
    print(f"average response time (ms)           : {total_time / len(request_times)}")
    if hits_count > 0:
        print(f"average cache hit response time (ms) : {hits_time / hits_count}")
    else :
        print(f"average cache hit response time (ms) : N/A")
    if miss_count > 0:
        print(f"average cache miss response time (ms): {miss_time / miss_count}")
    else:
        print(f"average cache miss response time (ms): N/A")
    print(f"cache throughput (requests / s)      : { len(request_times) / total_time * 1000}")
    print(f"real throughput  (requests / s)      : { len(request_times) / (real_time)}")
--- a/tests/varied_workload.py
+++ b/tests/varied_workload.py
@@ -0,0 +1,82 @@
 import requests
 import random
 import json
 import time
 from tqdm import tqdm
 from utils import print_report
 baseurl = "http://localhost:8000"
 # Fetch all user IDs and friends list
 user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
 user_friends = json.loads(requests.get(baseurl + "/users_and_friends").content)
 random.seed(0)
 # Workload Configurations
 workloads = {
    "random_read": {"read": 1.0, "write": 0.0},
    "read_heavy": {"read": 0.8, "write": 0.2},
    "write_heavy": {"read": 0.2, "write": 0.8},
    "frequent_users": {"read": 1.0, "write": 0.0, "frequent_percent": 0.7},
    #"frequent_after_write": {"read": 0.7, "write": 0.3},
    #"friend_based": {"read": 1.0, "write": 0.0, "friend_access": 0.7},
 }
 # Frequent users for workload testing (top 10 users)
 frequent_users = user_ids[:10]
 def generate_request(workload, last_updated=None):
    """Generate read or write requests based on workload type"""
    if random.random() < workload["read"]:
        user_id = select_user(workload, last_updated)
        return baseurl + f"/user/{user_id}", None, "GET"
    # Write operation (updates user profile)
    user_id = select_user(workload, last_updated)
    write_obj = { "user_id":user_id,"name": "UpdatedUser", "followers":"500","bio":"Updated","posts":"UpdatedPost"}
    url = baseurl + f"/update_user/"
    return url, write_obj, "POST"
 def select_user(workload, last_updated):
    """Selects a user based on workload type"""
    if "frequent_percent" in workload and random.random() < workload["frequent_percent"]:
        return random.choice(frequent_users)
    if "friend_access" in workload and random.random() < workload["friend_access"]:
        return random.choice(user_friends.get(str(last_updated), user_ids))
    return random.choice(user_ids)
 def run_workload(name, workload):
    """Run a workload and log performance metrics"""
    times = []
    hits = []
    start = time.time()
    last_updated = None
    for _ in tqdm(range(10000), desc=f"Running {name}"):
        url, data, method = generate_request(workload, last_updated)
        if( method == "GET" ):
            response = requests.request(method, url)
        else:
            response = requests.post(url, json = data)
        try:
            content = json.loads(response.content)
            if "time_ms" in content:
                times.append(content["time_ms"])
                hits.append(content["source"] == "cache")
                if method == "POST":
                    last_updated = content.get("user_id", last_updated)
        except (json.JSONDecodeError, KeyError):
            print(f"Error processing response: {response.content}")
    end = time.time()
    print(f"\n--- {name} Results ---")
    print_report(hits, times, end - start)
 # Run all workloads
 for workload_name, workload_config in workloads.items():
    run_workload(workload_name, workload_config)
--- a/tests/weighted_frequentuser_readwrite.py
+++ b/tests/weighted_frequentuser_readwrite.py
@@ -0,0 +1,65 @@
 import requests
 import random
 import json
 from tqdm import tqdm
 import time
 from utils import print_report
 baseurl = "http://localhost:8000"
 endpoints = {
    "/user/{user_id}": 0.8,  # 80% read operations
    "/update_user/?user_id={user_id}&name=Test&followers=100&bio=Updated&posts=Updated": 0.2  # 20% write operations
 }
 # Fetch all user IDs
 user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
 random.seed(0)
 def generate_random():
    """Randomly generate a read or write request, favoring cache hits."""
    endpoint = random.choices(list(endpoints.keys()), list(endpoints.values()))[0]
    if endpoint == "/user/{user_id}":
        # Favor frequently accessed user IDs to increase hit ratio
        if len(user_ids) > 0:
            # Sample from a subset of user IDs to simulate frequent access
            frequent_users = user_ids[:int(len(user_ids) * 0.2)]  # 20% frequent users
            random_user = str(random.choice(frequent_users)) if random.random() < 0.7 else str(random.choice(user_ids))
        else:
            random_user = str(random.choice(user_ids))
    else:
        random_user = str(random.choice(user_ids))
    return baseurl + endpoint.replace("{user_id}", random_user)
 times = []
 hits = []
 # Warm-up phase
 # for _ in tqdm(range(1000)):
 #     url = generate_random()
 #     requests.get(url)
 # Main testing phase
 start = time.time()
 for i in tqdm(range(10000)):
    url = generate_random()
    response = requests.get(url)
    try:
        content = json.loads(response.content)
        if "time_ms" in content:  # Only process if "time_ms" exists
            times.append(content["time_ms"])
            hits.append(content["source"] == "cache")
    except json.JSONDecodeError:
        print(f"Error decoding JSON: {response.content}")
        exit(1)
    except KeyError:
        print(f"Unexpected response format: {content}")
        exit(1)
 end = time.time()
 print_report(hits, times, end - start)
--- a/tests/weighted_friend_readonly.py
+++ b/tests/weighted_friend_readonly.py
@@ -0,0 +1,47 @@
 # Tests latency and hit rate of endpoints. Can be configured with weighted averages for various endpoints.
 import requests
 import random
 import json
 from tqdm import tqdm
 import time
 from utils import print_report
 baseurl = "http://localhost:8000"
 chance_of_selecting_friend = 0.7
 user_friends = json.loads(requests.get(baseurl + "/users_and_friends").content)
 user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"]
 random.seed(0)
 def fetch_friend(prob):
    return random.random() < prob
 def generate_random():
    random_user = str(random.choice(user_ids))
    return random_user
 def generate_random_friend(user):
    next_user = str(random.choice(user_friends[user]))
    return next_user
 times = []
 hits = []
 start = time.time()
 curr_user = generate_random()
 for i in tqdm(range(10000)):
    url = baseurl + "/user/" + curr_user
    response = requests.get(url)
    content = json.loads(response.content)
    times.append(content["time_ms"])
    hits.append(content["source"] == "cache")
    if fetch_friend(chance_of_selecting_friend):
        curr_user = generate_random_friend(curr_user)
    else:
        curr_user = generate_random()
 end = time.time()
 print_report(hits, times, end - start)
Author	SHA1	Message	Date
Arthur Lu	db002dbdd0	update results for baseline, ideal, nocache, prefetch, and tiered add results for readafterwrite remove frequent_after_write and friend_based from varied workload	2025-03-04 07:17:30 +00:00
Derek Wang	9cd8331a9d	Added test specifically for read_after_write	2025-03-03 22:50:49 -08:00
Derek Wang	583c8633f7	Added test specifically for read_after_write	2025-03-03 22:48:35 -08:00
Arthur Lu	d2da61fa86	remove frequent_after_write from unified test script	2025-03-04 06:30:14 +00:00
Arthur Lu	4197ffb0ea	INVALIDclear! initial results	2025-03-04 06:29:06 +00:00
root	474d827de6	Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching	2025-03-04 06:24:40 +00:00
Arthur Lu	839e1b8bd7	remove print statements from sieve, update gitignore, minor fix to test utils	2025-03-04 06:24:13 +00:00
HiccupHan	31ffbc18ec	Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching	2025-03-03 22:19:51 -08:00
HiccupHan	f737271303	fix for sampling issue	2025-03-03 22:19:49 -08:00
Derek Wang	32ac22806c	Fixed POST requests in system	2025-03-03 19:29:55 -08:00
Arthur Lu	8430009f8c	update order in varied workload	2025-03-03 22:50:23 +00:00
Arthur Lu	8cb333b305	fix weighted friend test naming	2025-03-03 22:30:43 +00:00
Arthur Lu	b28decfc91	fix sieve nameing	2025-03-03 22:29:45 +00:00
PurvaG1700	d57069856f	[ADD]- Added end to end varied workloads	2025-03-02 23:49:26 -08:00
Derek Wang	72b72a949f	Added ReadAfterWrite	2025-03-02 19:41:55 -08:00
Arthur Lu	35ea5a234f	add no cache and ideal cache, move printing cache reports to util file	2025-03-02 21:22:31 +00:00
Arthur Lu	50b5ea0acd	update configs for new database size, add simulated 10ms delay to database, updated test script naming to better convey the experiment method	2025-03-01 00:35:51 +00:00
HiccupHan	3d40e4cc6f	new data	2025-02-28 12:24:54 -08:00
HiccupHan	9699111c11	Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching	2025-02-28 12:15:18 -08:00
HiccupHan	ebf531f586	new data	2025-02-28 12:15:11 -08:00
root	6b082e0fea	Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching	2025-02-28 20:12:33 +00:00
Arthur Lu	e5d8d3c0cf	add basic data generation	2025-02-28 20:12:23 +00:00
HiccupHan	1c30154aa7	Merge branch 'main' of https://github.com/ltcptgeneral/cs239-caching	2025-02-28 12:03:46 -08:00
HiccupHan	33d9c0b716	added data	2025-02-28 12:03:38 -08:00
Arthur Lu	c3340e037d	improve tiered cache with return to l1 mechanic	2025-02-28 19:21:53 +00:00
Arthur Lu	f7903f4fea	add individual config files for each strategy, config file path can be specified as the second argument	2025-02-28 19:17:44 +00:00
HiccupHan	e9b1128826	prefetch test	2025-02-25 13:02:48 -08:00
isha28-uclaCS	a276151e0c	Seive Testing	2025-02-24 22:04:47 -08:00
HiccupHan	f475991f50	randomize friends further	2025-02-24 17:47:07 -08:00
Arthur Lu	9dd93d96f2	Merge pull request #1 from ltcptgeneral/metrics add throughput and hit ratio metrics	2025-02-23 11:18:56 -08:00
HiccupHan	5dc9a99a3a	Add to db instead of creating new one	2025-02-22 19:57:04 -08:00