mirror of
https://github.com/ltcptgeneral/cs239-caching.git
synced 2025-10-24 12:59:20 +00:00
Add to db instead of creating new one
This commit is contained in:
59
app/cache/prefetch_cache.py
vendored
59
app/cache/prefetch_cache.py
vendored
@@ -1,11 +1,29 @@
|
||||
from .cache import BaselineCache
|
||||
from .cache import Cache
|
||||
from database import get_user_profile
|
||||
from collections import OrderedDict
|
||||
import math
|
||||
|
||||
class PrefetchCache(BaselineCache):
|
||||
key_relations = None
|
||||
class PrefetchCache(Cache):
|
||||
limit = None
|
||||
cache = None
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, limit):
|
||||
super()
|
||||
self.key_relations = dict()
|
||||
self.limit = limit
|
||||
self.cache = OrderedDict()
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.cache == other
|
||||
|
||||
def __len__(self):
|
||||
return len(self.cache)
|
||||
|
||||
def get(self, key: str) -> str:
|
||||
if key in self.cache:
|
||||
self.cache.move_to_end(key)
|
||||
return self.cache[key]
|
||||
else:
|
||||
return None
|
||||
|
||||
def put(self, key: str, val: str) -> bool:
|
||||
# LRU evict
|
||||
@@ -14,16 +32,29 @@ class PrefetchCache(BaselineCache):
|
||||
self.cache.popitem(last = False)
|
||||
evict = True
|
||||
self.cache[key] = val
|
||||
self.prefetch(key, val)
|
||||
if self.prefetch(val):
|
||||
evict = True
|
||||
|
||||
return evict
|
||||
|
||||
def prefetch(self, key: str, val: str) -> bool:
|
||||
if len(self.cache) >= self.limit and key in self.key_relations:
|
||||
self.cache[self.key_relations[key][0]] = self.key_relations[key][1]
|
||||
return True
|
||||
return False
|
||||
def prefetch(self, profile) -> bool:
|
||||
evict = False
|
||||
for i in range(math.ceil(self.limit*0.1)):
|
||||
if i < len(profile["friends"]):
|
||||
data = get_user_profile(profile["friends"][i])
|
||||
if len(self.cache) >= self.limit:
|
||||
self.cache.popitem(last = False)
|
||||
evict = True
|
||||
self.cache[profile["friends"][i]] = data
|
||||
else:
|
||||
break
|
||||
return evict
|
||||
|
||||
def invalidate(self, key: str) -> bool:
|
||||
# basic delete invalidation, no (p)refetching
|
||||
if key in self.cache:
|
||||
del self.cache[key]
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def set_relations(self, key: str, related_key: str, related_val: str):
|
||||
self.key_relations[key] = related_key | related_val
|
||||
return
|
@@ -28,7 +28,7 @@ def init_db():
|
||||
# Prepopulate database with some sample users if empty
|
||||
if len(db) == 0:
|
||||
db.insert_multiple([
|
||||
{"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!"},
|
||||
{"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!"},
|
||||
{"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!"}
|
||||
{"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!", "friends": ["2"]},
|
||||
{"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!","friends": ["3", "1"]},
|
||||
{"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!", "friends": ["1"]}
|
||||
])
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -6,6 +6,10 @@ warnings.filterwarnings('ignore')
|
||||
import re
|
||||
import random
|
||||
import json
|
||||
from tinydb import TinyDB
|
||||
from tinydb.storages import JSONStorage
|
||||
from tinydb.middlewares import CachingMiddleware
|
||||
import math
|
||||
|
||||
HUGGINGFACEHUB_API_TOKEN = None
|
||||
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
|
||||
@@ -21,13 +25,11 @@ def parse_profile(text, user_id, num_users):
|
||||
match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text)
|
||||
name, bio, posts = match.groups()
|
||||
|
||||
# Generate mock followers count (randomized for realism)
|
||||
followers = random.randint(10, 5000)
|
||||
|
||||
# Generate mock friends (users with nearby IDs)
|
||||
friend_ids = [str(fid) for fid in range(num_users) if fid != user_id]
|
||||
random.shuffle(friend_ids)
|
||||
friends = friend_ids[:random.randint(1, num_users-1)] # Each user gets 1-5 friends
|
||||
friends = friend_ids[:random.randint(1, min(100, math.ceil(num_users/3)))]
|
||||
|
||||
return {
|
||||
"user_id": str(user_id),
|
||||
@@ -38,7 +40,7 @@ def parse_profile(text, user_id, num_users):
|
||||
"friends": friends
|
||||
}
|
||||
|
||||
def generate_data(num_users):
|
||||
def generate_data(base_id, num_users):
|
||||
system_message = """You are a data generator creating user profiles for a social media app.
|
||||
Always provide user profiles in this format: Name | Interest | Recent Activity.
|
||||
Do not include numbers, IDs, or assistant labels. Only return a properly formatted response.
|
||||
@@ -60,25 +62,30 @@ def generate_data(num_users):
|
||||
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
||||
)
|
||||
llm_chain = prompt | llm
|
||||
data = {}
|
||||
i = 0
|
||||
data = []
|
||||
i = base_id
|
||||
user_id = 0
|
||||
while user_id < num_users:
|
||||
raw_text = llm_chain.invoke({"user_id": i})
|
||||
while not valid_data(raw_text):
|
||||
i = i + 1
|
||||
raw_text = llm_chain.invoke({"user_id": i})
|
||||
user_profile = parse_profile(raw_text, user_id, num_users)
|
||||
user_profile = parse_profile(raw_text, base_id + user_id, num_users)
|
||||
user_id = user_id + 1
|
||||
data[user_id] = user_profile
|
||||
i = i + 1
|
||||
data.append(user_profile)
|
||||
|
||||
return data
|
||||
|
||||
if __name__ == "__main__":
|
||||
data = generate_data(100)
|
||||
base_id = input("Enter base id (check db to find the next consecutive user_id): ")
|
||||
num_users = input("Enter number of users to generate: ")
|
||||
data = generate_data(int(base_id), int(num_users))
|
||||
|
||||
# Create json file
|
||||
json_object = json.dumps( {"_default": data}, indent=4 )
|
||||
with open( "datastore/llmData_sns.json", "w" ) as f:
|
||||
f.write( json_object )
|
||||
file_path = "datastore/llmData_sns.json"
|
||||
global db
|
||||
db = TinyDB(file_path, storage=CachingMiddleware(JSONStorage), indent=4)
|
||||
db.insert_multiple(data)
|
||||
db.close()
|
||||
|
Reference in New Issue
Block a user