mirror of
https://github.com/ltcptgeneral/cs239-caching.git
synced 2025-04-10 07:37:22 +00:00
Deepseek data generation
This commit is contained in:
parent
3707a2aae9
commit
5c25a2b099
3
app/cache/prefetch_cache.py
vendored
3
app/cache/prefetch_cache.py
vendored
@ -24,5 +24,6 @@ class PrefetchCache(BaselineCache):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def set_relations(self):
|
def set_relations(self, key: str, related_key: str, related_val: str):
|
||||||
|
self.key_relations[key] = related_key | related_val
|
||||||
return
|
return
|
@ -1,5 +1,6 @@
|
|||||||
|
|
||||||
from tinydb import TinyDB, Query
|
from tinydb import TinyDB, Query
|
||||||
|
from generate_data import generate_data
|
||||||
|
|
||||||
# Initialize TinyDB as a NoSQL key-value store
|
# Initialize TinyDB as a NoSQL key-value store
|
||||||
DB_FILE = "database.json"
|
DB_FILE = "database.json"
|
||||||
@ -11,9 +12,9 @@ def get_user_profile(user_id):
|
|||||||
result = db.search(User.user_id == user_id)
|
result = db.search(User.user_id == user_id)
|
||||||
return result[0] if result else None
|
return result[0] if result else None
|
||||||
|
|
||||||
def update_user_profile(user_id, name, followers, bio, posts):
|
def update_user_profile(user_id, name, followers, bio, posts, friends):
|
||||||
"""Update user profile in TinyDB"""
|
"""Update user profile in TinyDB"""
|
||||||
db.upsert({"user_id": user_id, "name": name, "followers": followers, "bio": bio, "posts": posts}, User.user_id == user_id)
|
db.upsert({"user_id": user_id, "name": name, "followers": followers, "bio": bio, "posts": posts, "friends": friends}, User.user_id == user_id)
|
||||||
|
|
||||||
def init_db():
|
def init_db():
|
||||||
"""Ensure TinyDB is initialized before FastAPI starts and prepopulate some data"""
|
"""Ensure TinyDB is initialized before FastAPI starts and prepopulate some data"""
|
||||||
@ -22,8 +23,5 @@ def init_db():
|
|||||||
|
|
||||||
# Prepopulate database with some sample users if empty
|
# Prepopulate database with some sample users if empty
|
||||||
if len(db) == 0:
|
if len(db) == 0:
|
||||||
db.insert_multiple([
|
data = generate_data(100)
|
||||||
{"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!"},
|
db.insert_multiple(data)
|
||||||
{"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!"},
|
|
||||||
{"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!"}
|
|
||||||
])
|
|
||||||
|
66
app/generate_data.py
Normal file
66
app/generate_data.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
import os
|
||||||
|
from langchain_huggingface import HuggingFaceEndpoint
|
||||||
|
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings('ignore')
|
||||||
|
import re
|
||||||
|
import random
|
||||||
|
|
||||||
|
HUGGINGFACEHUB_API_TOKEN = None
|
||||||
|
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
|
||||||
|
|
||||||
|
def parse_profile(text, user_id, num_users):
|
||||||
|
match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text)
|
||||||
|
if not match:
|
||||||
|
return None # Skip invalid responses
|
||||||
|
|
||||||
|
name, bio, posts = match.groups()
|
||||||
|
|
||||||
|
# Generate mock followers count (randomized for realism)
|
||||||
|
followers = random.randint(10, 5000)
|
||||||
|
|
||||||
|
# Generate mock friends (users with nearby IDs)
|
||||||
|
friend_ids = [str(fid) for fid in range(num_users) if fid != user_id]
|
||||||
|
random.shuffle(friend_ids)
|
||||||
|
friends = friend_ids[:random.randint(1, num_users-1)] # Each user gets 1-5 friends
|
||||||
|
|
||||||
|
return {
|
||||||
|
"user_id": str(user_id),
|
||||||
|
"name": name.strip(),
|
||||||
|
"followers": followers,
|
||||||
|
"bio": bio.strip(),
|
||||||
|
"posts": posts.strip(),
|
||||||
|
"friends": friends
|
||||||
|
}
|
||||||
|
|
||||||
|
def generate_data(num_users):
|
||||||
|
system_message = """You are a data generator creating user profiles for a social media app.
|
||||||
|
Always provide user profiles in this format: Name | Interest | Recent Activity.
|
||||||
|
Do not include numbers, IDs, or assistant labels. Only return a properly formatted response.
|
||||||
|
|
||||||
|
Example: Alice Wonderland | Exploring the world one frame at a time! | Just captured a stunning sunset."""
|
||||||
|
# prompt = PromptTemplate.from_template(template)
|
||||||
|
prompt = ChatPromptTemplate ([
|
||||||
|
("system", system_message),
|
||||||
|
("user", "Generate a user profile for user {user_id}")
|
||||||
|
])
|
||||||
|
|
||||||
|
llm = HuggingFaceEndpoint(
|
||||||
|
task='text-generation',
|
||||||
|
model="deepseek-ai/DeepSeek-R1",
|
||||||
|
max_new_tokens=150,
|
||||||
|
do_sample=True,
|
||||||
|
top_k=60,
|
||||||
|
temperature=1.0,
|
||||||
|
top_p=0.9,
|
||||||
|
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
||||||
|
)
|
||||||
|
llm_chain = prompt | llm
|
||||||
|
data = []
|
||||||
|
for i in range(num_users):
|
||||||
|
raw_text = llm_chain.invoke({"user_id": i})
|
||||||
|
user_profile = parse_profile(raw_text, i, num_users)
|
||||||
|
if user_profile:
|
||||||
|
data.append(user_profile)
|
||||||
|
|
||||||
|
return data
|
@ -13,7 +13,7 @@ app = FastAPI()
|
|||||||
if CACHE_STRATEGY == "Baseline":
|
if CACHE_STRATEGY == "Baseline":
|
||||||
cache = BaselineCache(limit=CACHE_LIMIT)
|
cache = BaselineCache(limit=CACHE_LIMIT)
|
||||||
elif CACHE_STRATEGY == "Prefetch":
|
elif CACHE_STRATEGY == "Prefetch":
|
||||||
cache = PrefetchCache()
|
cache = PrefetchCache(limit=CACHE_LIMIT)
|
||||||
elif CACHE_STRATEGY == "Tiered":
|
elif CACHE_STRATEGY == "Tiered":
|
||||||
cache = TieredCache(limit=CACHE_LIMIT, l2_limit=L2_CACHE_LIMIT)
|
cache = TieredCache(limit=CACHE_LIMIT, l2_limit=L2_CACHE_LIMIT)
|
||||||
elif CACHE_STRATEGY == "Seive":
|
elif CACHE_STRATEGY == "Seive":
|
||||||
@ -37,8 +37,8 @@ def fetch_user_profile(user_id: str):
|
|||||||
return {"user_id": user_id, "profile": profile, "source": "database", "time_ms": (time.time() - start) * 1000}
|
return {"user_id": user_id, "profile": profile, "source": "database", "time_ms": (time.time() - start) * 1000}
|
||||||
|
|
||||||
@app.post("/update_user/")
|
@app.post("/update_user/")
|
||||||
def modify_user_profile(user_id: str, name: str, followers: int, bio: str, posts: str):
|
def modify_user_profile(user_id: str, name: str, followers: int, bio: str, posts: str, friends: list[str]):
|
||||||
"""Update user profile and refresh cache"""
|
"""Update user profile and refresh cache"""
|
||||||
update_user_profile(user_id, name, followers, bio, posts)
|
update_user_profile(user_id, name, followers, bio, posts, friends)
|
||||||
cache.invalidate(user_id) # Invalidate old cache
|
cache.invalidate(user_id) # Invalidate old cache
|
||||||
return {"message": "User profile updated successfully"}
|
return {"message": "User profile updated successfully"}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user