diff --git a/app/cache/prefetch_cache.py b/app/cache/prefetch_cache.py index 63a0799..c17e0e1 100644 --- a/app/cache/prefetch_cache.py +++ b/app/cache/prefetch_cache.py @@ -24,5 +24,6 @@ class PrefetchCache(BaselineCache): return True return False - def set_relations(self): + def set_relations(self, key: str, related_key: str, related_val: str): + self.key_relations[key] = related_key | related_val return \ No newline at end of file diff --git a/app/database.py b/app/database.py index b2935da..fb3f17f 100644 --- a/app/database.py +++ b/app/database.py @@ -1,5 +1,6 @@ from tinydb import TinyDB, Query +from generate_data import generate_data # Initialize TinyDB as a NoSQL key-value store DB_FILE = "database.json" @@ -11,9 +12,9 @@ def get_user_profile(user_id): result = db.search(User.user_id == user_id) return result[0] if result else None -def update_user_profile(user_id, name, followers, bio, posts): +def update_user_profile(user_id, name, followers, bio, posts, friends): """Update user profile in TinyDB""" - db.upsert({"user_id": user_id, "name": name, "followers": followers, "bio": bio, "posts": posts}, User.user_id == user_id) + db.upsert({"user_id": user_id, "name": name, "followers": followers, "bio": bio, "posts": posts, "friends": friends}, User.user_id == user_id) def init_db(): """Ensure TinyDB is initialized before FastAPI starts and prepopulate some data""" @@ -22,8 +23,5 @@ def init_db(): # Prepopulate database with some sample users if empty if len(db) == 0: - db.insert_multiple([ - {"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!"}, - {"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!"}, - {"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!"} - ]) + data = generate_data(100) + db.insert_multiple(data) diff --git a/app/generate_data.py b/app/generate_data.py new file mode 100644 index 0000000..14ad586 --- /dev/null +++ b/app/generate_data.py @@ -0,0 +1,66 @@ +import os +from langchain_huggingface import HuggingFaceEndpoint +from langchain_core.prompts import PromptTemplate, ChatPromptTemplate +import warnings +warnings.filterwarnings('ignore') +import re +import random + +HUGGINGFACEHUB_API_TOKEN = None +os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN + +def parse_profile(text, user_id, num_users): + match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text) + if not match: + return None # Skip invalid responses + + name, bio, posts = match.groups() + + # Generate mock followers count (randomized for realism) + followers = random.randint(10, 5000) + + # Generate mock friends (users with nearby IDs) + friend_ids = [str(fid) for fid in range(num_users) if fid != user_id] + random.shuffle(friend_ids) + friends = friend_ids[:random.randint(1, num_users-1)] # Each user gets 1-5 friends + + return { + "user_id": str(user_id), + "name": name.strip(), + "followers": followers, + "bio": bio.strip(), + "posts": posts.strip(), + "friends": friends + } + +def generate_data(num_users): + system_message = """You are a data generator creating user profiles for a social media app. + Always provide user profiles in this format: Name | Interest | Recent Activity. + Do not include numbers, IDs, or assistant labels. Only return a properly formatted response. + + Example: Alice Wonderland | Exploring the world one frame at a time! | Just captured a stunning sunset.""" + # prompt = PromptTemplate.from_template(template) + prompt = ChatPromptTemplate ([ + ("system", system_message), + ("user", "Generate a user profile for user {user_id}") + ]) + + llm = HuggingFaceEndpoint( + task='text-generation', + model="deepseek-ai/DeepSeek-R1", + max_new_tokens=150, + do_sample=True, + top_k=60, + temperature=1.0, + top_p=0.9, + huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN, + ) + llm_chain = prompt | llm + data = [] + for i in range(num_users): + raw_text = llm_chain.invoke({"user_id": i}) + user_profile = parse_profile(raw_text, i, num_users) + if user_profile: + data.append(user_profile) + + return data \ No newline at end of file diff --git a/app/main.py b/app/main.py index 2dca0ab..afcc544 100644 --- a/app/main.py +++ b/app/main.py @@ -13,7 +13,7 @@ app = FastAPI() if CACHE_STRATEGY == "Baseline": cache = BaselineCache(limit=CACHE_LIMIT) elif CACHE_STRATEGY == "Prefetch": - cache = PrefetchCache() + cache = PrefetchCache(limit=CACHE_LIMIT) elif CACHE_STRATEGY == "Tiered": cache = TieredCache(limit=CACHE_LIMIT, l2_limit=L2_CACHE_LIMIT) elif CACHE_STRATEGY == "Seive": @@ -37,8 +37,8 @@ def fetch_user_profile(user_id: str): return {"user_id": user_id, "profile": profile, "source": "database", "time_ms": (time.time() - start) * 1000} @app.post("/update_user/") -def modify_user_profile(user_id: str, name: str, followers: int, bio: str, posts: str): +def modify_user_profile(user_id: str, name: str, followers: int, bio: str, posts: str, friends: list[str]): """Update user profile and refresh cache""" - update_user_profile(user_id, name, followers, bio, posts) + update_user_profile(user_id, name, followers, bio, posts, friends) cache.invalidate(user_id) # Invalidate old cache return {"message": "User profile updated successfully"}