mirror of
https://github.com/ltcptgeneral/cs239-caching.git
synced 2025-04-04 05:23:26 +00:00
Add to db instead of creating new one
This commit is contained in:
parent
b5e6f5eb9f
commit
5dc9a99a3a
app
59
app/cache/prefetch_cache.py
vendored
59
app/cache/prefetch_cache.py
vendored
@ -1,11 +1,29 @@
|
|||||||
from .cache import BaselineCache
|
from .cache import Cache
|
||||||
|
from database import get_user_profile
|
||||||
|
from collections import OrderedDict
|
||||||
|
import math
|
||||||
|
|
||||||
class PrefetchCache(BaselineCache):
|
class PrefetchCache(Cache):
|
||||||
key_relations = None
|
limit = None
|
||||||
|
cache = None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, limit):
|
||||||
super()
|
super()
|
||||||
self.key_relations = dict()
|
self.limit = limit
|
||||||
|
self.cache = OrderedDict()
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.cache == other
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.cache)
|
||||||
|
|
||||||
|
def get(self, key: str) -> str:
|
||||||
|
if key in self.cache:
|
||||||
|
self.cache.move_to_end(key)
|
||||||
|
return self.cache[key]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
def put(self, key: str, val: str) -> bool:
|
def put(self, key: str, val: str) -> bool:
|
||||||
# LRU evict
|
# LRU evict
|
||||||
@ -14,16 +32,29 @@ class PrefetchCache(BaselineCache):
|
|||||||
self.cache.popitem(last = False)
|
self.cache.popitem(last = False)
|
||||||
evict = True
|
evict = True
|
||||||
self.cache[key] = val
|
self.cache[key] = val
|
||||||
self.prefetch(key, val)
|
if self.prefetch(val):
|
||||||
|
evict = True
|
||||||
|
|
||||||
return evict
|
return evict
|
||||||
|
|
||||||
def prefetch(self, key: str, val: str) -> bool:
|
def prefetch(self, profile) -> bool:
|
||||||
if len(self.cache) >= self.limit and key in self.key_relations:
|
evict = False
|
||||||
self.cache[self.key_relations[key][0]] = self.key_relations[key][1]
|
for i in range(math.ceil(self.limit*0.1)):
|
||||||
return True
|
if i < len(profile["friends"]):
|
||||||
return False
|
data = get_user_profile(profile["friends"][i])
|
||||||
|
if len(self.cache) >= self.limit:
|
||||||
|
self.cache.popitem(last = False)
|
||||||
|
evict = True
|
||||||
|
self.cache[profile["friends"][i]] = data
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return evict
|
||||||
|
|
||||||
def set_relations(self, key: str, related_key: str, related_val: str):
|
def invalidate(self, key: str) -> bool:
|
||||||
self.key_relations[key] = related_key | related_val
|
# basic delete invalidation, no (p)refetching
|
||||||
return
|
if key in self.cache:
|
||||||
|
del self.cache[key]
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
@ -28,7 +28,7 @@ def init_db():
|
|||||||
# Prepopulate database with some sample users if empty
|
# Prepopulate database with some sample users if empty
|
||||||
if len(db) == 0:
|
if len(db) == 0:
|
||||||
db.insert_multiple([
|
db.insert_multiple([
|
||||||
{"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!"},
|
{"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!", "friends": ["2"]},
|
||||||
{"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!"},
|
{"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!","friends": ["3", "1"]},
|
||||||
{"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!"}
|
{"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!", "friends": ["1"]}
|
||||||
])
|
])
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -6,6 +6,10 @@ warnings.filterwarnings('ignore')
|
|||||||
import re
|
import re
|
||||||
import random
|
import random
|
||||||
import json
|
import json
|
||||||
|
from tinydb import TinyDB
|
||||||
|
from tinydb.storages import JSONStorage
|
||||||
|
from tinydb.middlewares import CachingMiddleware
|
||||||
|
import math
|
||||||
|
|
||||||
HUGGINGFACEHUB_API_TOKEN = None
|
HUGGINGFACEHUB_API_TOKEN = None
|
||||||
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
|
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
|
||||||
@ -21,13 +25,11 @@ def parse_profile(text, user_id, num_users):
|
|||||||
match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text)
|
match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text)
|
||||||
name, bio, posts = match.groups()
|
name, bio, posts = match.groups()
|
||||||
|
|
||||||
# Generate mock followers count (randomized for realism)
|
|
||||||
followers = random.randint(10, 5000)
|
followers = random.randint(10, 5000)
|
||||||
|
|
||||||
# Generate mock friends (users with nearby IDs)
|
|
||||||
friend_ids = [str(fid) for fid in range(num_users) if fid != user_id]
|
friend_ids = [str(fid) for fid in range(num_users) if fid != user_id]
|
||||||
random.shuffle(friend_ids)
|
random.shuffle(friend_ids)
|
||||||
friends = friend_ids[:random.randint(1, num_users-1)] # Each user gets 1-5 friends
|
friends = friend_ids[:random.randint(1, min(100, math.ceil(num_users/3)))]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"user_id": str(user_id),
|
"user_id": str(user_id),
|
||||||
@ -38,7 +40,7 @@ def parse_profile(text, user_id, num_users):
|
|||||||
"friends": friends
|
"friends": friends
|
||||||
}
|
}
|
||||||
|
|
||||||
def generate_data(num_users):
|
def generate_data(base_id, num_users):
|
||||||
system_message = """You are a data generator creating user profiles for a social media app.
|
system_message = """You are a data generator creating user profiles for a social media app.
|
||||||
Always provide user profiles in this format: Name | Interest | Recent Activity.
|
Always provide user profiles in this format: Name | Interest | Recent Activity.
|
||||||
Do not include numbers, IDs, or assistant labels. Only return a properly formatted response.
|
Do not include numbers, IDs, or assistant labels. Only return a properly formatted response.
|
||||||
@ -60,25 +62,30 @@ def generate_data(num_users):
|
|||||||
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
||||||
)
|
)
|
||||||
llm_chain = prompt | llm
|
llm_chain = prompt | llm
|
||||||
data = {}
|
data = []
|
||||||
i = 0
|
i = base_id
|
||||||
user_id = 0
|
user_id = 0
|
||||||
while user_id < num_users:
|
while user_id < num_users:
|
||||||
raw_text = llm_chain.invoke({"user_id": i})
|
raw_text = llm_chain.invoke({"user_id": i})
|
||||||
while not valid_data(raw_text):
|
while not valid_data(raw_text):
|
||||||
i = i + 1
|
i = i + 1
|
||||||
raw_text = llm_chain.invoke({"user_id": i})
|
raw_text = llm_chain.invoke({"user_id": i})
|
||||||
user_profile = parse_profile(raw_text, user_id, num_users)
|
user_profile = parse_profile(raw_text, base_id + user_id, num_users)
|
||||||
user_id = user_id + 1
|
user_id = user_id + 1
|
||||||
data[user_id] = user_profile
|
i = i + 1
|
||||||
|
data.append(user_profile)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
data = generate_data(100)
|
base_id = input("Enter base id (check db to find the next consecutive user_id): ")
|
||||||
|
num_users = input("Enter number of users to generate: ")
|
||||||
|
data = generate_data(int(base_id), int(num_users))
|
||||||
|
|
||||||
# Create json file
|
# Create json file
|
||||||
json_object = json.dumps( {"_default": data}, indent=4 )
|
file_path = "datastore/llmData_sns.json"
|
||||||
with open( "datastore/llmData_sns.json", "w" ) as f:
|
global db
|
||||||
f.write( json_object )
|
db = TinyDB(file_path, storage=CachingMiddleware(JSONStorage), indent=4)
|
||||||
|
db.insert_multiple(data)
|
||||||
|
db.close()
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user