From e9b112882616f48057cf388648ed536a57e8d4b4 Mon Sep 17 00:00:00 2001 From: HiccupHan <89772977+HiccupHan@users.noreply.github.com> Date: Tue, 25 Feb 2025 13:02:48 -0800 Subject: [PATCH] prefetch test --- app/cache/prefetch_cache.py | 17 ++++----- app/config.yaml | 2 +- app/database.py | 17 +++++++++ app/main.py | 6 +++- tests/random_friend_readonly.py | 63 +++++++++++++++++++++++++++++++++ 5 files changed, 93 insertions(+), 12 deletions(-) create mode 100644 tests/random_friend_readonly.py diff --git a/app/cache/prefetch_cache.py b/app/cache/prefetch_cache.py index 8050b87..45272cd 100644 --- a/app/cache/prefetch_cache.py +++ b/app/cache/prefetch_cache.py @@ -1,5 +1,5 @@ from .cache import Cache -from database import get_user_profile +from database import get_user_profile, get_friends from collections import OrderedDict import math @@ -39,15 +39,12 @@ class PrefetchCache(Cache): def prefetch(self, profile) -> bool: evict = False - for i in range(math.ceil(self.limit*0.1)): - if i < len(profile["friends"]): - data = get_user_profile(profile["friends"][i]) - if len(self.cache) >= self.limit: - self.cache.popitem(last = False) - evict = True - self.cache[profile["friends"][i]] = data - else: - break + friends_prof = get_friends(profile["user_id"], math.ceil(self.limit*0.1)) + for i in friends_prof: + if len(self.cache) >= self.limit: + self.cache.popitem(last = False) + evict = True + self.cache[i] = friends_prof[i] return evict def invalidate(self, key: str) -> bool: diff --git a/app/config.yaml b/app/config.yaml index 99494e9..162518f 100644 --- a/app/config.yaml +++ b/app/config.yaml @@ -1,4 +1,4 @@ -cache_strategy: "Tiered" # Change this to "Prefetch" or "Tiered" or "Seive" +cache_strategy: "Prefetch" # Change this to "Prefetch" or "Tiered" or "Seive" cache_limit: 10 l2_cache_limit: 100 db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder \ No newline at end of file diff --git a/app/database.py b/app/database.py index 75836b5..aa55595 100644 --- a/app/database.py +++ b/app/database.py @@ -1,6 +1,7 @@ from tinydb import TinyDB, Query from config import DB_FILE +import random DB_LOCATION = "database/datastore/" + DB_FILE @@ -11,6 +12,22 @@ User = Query() def get_user_ids(): return [x["user_id"] for x in db.all()] +def get_user_friend_ids(): + user_friends = {} + for x in db.all(): + user_friends[x["user_id"]] = x["friends"] + return user_friends + +def get_friends(user_id, num_friends): + friends = {} + curr_user = db.search(User.user_id == user_id) + random.seed(0) + if not curr_user: + return {} + for f in random.sample(curr_user[0]["friends"], num_friends): + friends[f] = db.search(User.user_id == user_id)[0] + return friends + def get_user_profile(user_id): """Fetch user profile from TinyDB""" result = db.search(User.user_id == user_id) diff --git a/app/main.py b/app/main.py index 6e8db33..fcfca9f 100644 --- a/app/main.py +++ b/app/main.py @@ -1,5 +1,5 @@ from fastapi import FastAPI, HTTPException -from database import get_user_ids, get_user_profile, update_user_profile +from database import get_user_ids, get_user_profile, update_user_profile, get_user_friend_ids from cache.cache import BaselineCache from cache.prefetch_cache import PrefetchCache from cache.tiered_cache import TieredCache @@ -25,6 +25,10 @@ else: def fetch_user_ids(): return {"ids": get_user_ids()} +@app.get("/users_and_friends") +def fetch_user_and_friends(): + return get_user_friend_ids() + @app.get("/user/{user_id}") def fetch_user_profile(user_id: str): """Fetch user profile with caching""" diff --git a/tests/random_friend_readonly.py b/tests/random_friend_readonly.py new file mode 100644 index 0000000..0ac48ad --- /dev/null +++ b/tests/random_friend_readonly.py @@ -0,0 +1,63 @@ +# Tests latency and hit rate of endpoints. Can be configured with weighted averages for various endpoints. + +import requests +import random +import json +from tqdm import tqdm +import time + +baseurl = "http://localhost:8000" + +chance_of_selecting_friend = 0.7 + +user_friends = json.loads(requests.get(baseurl + "/users_and_friends").content) +user_ids = json.loads(requests.get(baseurl + "/users").content)["ids"] + +random.seed(0) + +def fetch_friend(prob): + return random.random() < prob + +def generate_random(): + random_user = str(random.choice(user_ids)) + return random_user + +def generate_random_friend(user): + next_user = str(random.choice(user_friends[user])) + return next_user + +times = [] +hits = [] + +start = time.time() +curr_user = generate_random() +for i in tqdm(range(10000)): + url = baseurl + "/user/" + curr_user + response = requests.get(url) + content = json.loads(response.content) + times.append(content["time_ms"]) + hits.append(content["source"] == "cache") + if fetch_friend(chance_of_selecting_friend): + curr_user = generate_random_friend(curr_user) + else: + curr_user = generate_random() +end = time.time() + +hits_count = sum(hits) +miss_count = len(hits) - hits_count + +hits_time = 0 +miss_time = 0 +for i in range(len(times)): + if hits[i]: + hits_time += times[i] + else: + miss_time += times[i] +total_time = hits_time + miss_time + +print(f"hits: {hits_count} misses: {miss_count} ratio: { hits_count / (hits_count + miss_count)}") +print(f"average response time (ms) : {total_time / len(times)}") +print(f"average cache hit response time (ms) : {hits_time / hits_count}") +print(f"average cache miss response time (ms): {miss_time / miss_count}") +print(f"cache throughput (requests / ms) : { len(times) / total_time}") +print(f"real throughput (requests / ms) : { len(times) / (end - start) / 1000}") \ No newline at end of file