mirror of
				https://github.com/ltcptgeneral/cs239-caching.git
				synced 2025-10-25 05:19:20 +00:00 
			
		
		
		
	Deepseek data generation
This commit is contained in:
		
							
								
								
									
										3
									
								
								app/cache/prefetch_cache.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								app/cache/prefetch_cache.py
									
									
									
									
										vendored
									
									
								
							| @@ -24,5 +24,6 @@ class PrefetchCache(BaselineCache): | |||||||
|             return True |             return True | ||||||
|         return False |         return False | ||||||
|  |  | ||||||
|     def set_relations(self): |     def set_relations(self, key: str, related_key: str, related_val: str): | ||||||
|  |         self.key_relations[key] = related_key | related_val | ||||||
|         return |         return | ||||||
| @@ -1,5 +1,6 @@ | |||||||
|  |  | ||||||
| from tinydb import TinyDB, Query | from tinydb import TinyDB, Query | ||||||
|  | from generate_data import generate_data | ||||||
|  |  | ||||||
| # Initialize TinyDB as a NoSQL key-value store | # Initialize TinyDB as a NoSQL key-value store | ||||||
| DB_FILE = "database.json" | DB_FILE = "database.json" | ||||||
| @@ -11,9 +12,9 @@ def get_user_profile(user_id): | |||||||
|     result = db.search(User.user_id == user_id) |     result = db.search(User.user_id == user_id) | ||||||
|     return result[0] if result else None |     return result[0] if result else None | ||||||
|  |  | ||||||
| def update_user_profile(user_id, name, followers, bio, posts): | def update_user_profile(user_id, name, followers, bio, posts, friends): | ||||||
|     """Update user profile in TinyDB""" |     """Update user profile in TinyDB""" | ||||||
|     db.upsert({"user_id": user_id, "name": name, "followers": followers, "bio": bio, "posts": posts}, User.user_id == user_id) |     db.upsert({"user_id": user_id, "name": name, "followers": followers, "bio": bio, "posts": posts, "friends": friends}, User.user_id == user_id) | ||||||
|  |  | ||||||
| def init_db(): | def init_db(): | ||||||
|     """Ensure TinyDB is initialized before FastAPI starts and prepopulate some data""" |     """Ensure TinyDB is initialized before FastAPI starts and prepopulate some data""" | ||||||
| @@ -22,8 +23,5 @@ def init_db(): | |||||||
|  |  | ||||||
|     # Prepopulate database with some sample users if empty |     # Prepopulate database with some sample users if empty | ||||||
|     if len(db) == 0: |     if len(db) == 0: | ||||||
|         db.insert_multiple([ |         data = generate_data(100) | ||||||
|             {"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!"}, |         db.insert_multiple(data) | ||||||
|             {"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!"}, |  | ||||||
|             {"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!"} |  | ||||||
|         ]) |  | ||||||
|   | |||||||
							
								
								
									
										66
									
								
								app/generate_data.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								app/generate_data.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | |||||||
|  | import os | ||||||
|  | from langchain_huggingface import HuggingFaceEndpoint | ||||||
|  | from langchain_core.prompts import PromptTemplate, ChatPromptTemplate | ||||||
|  | import warnings | ||||||
|  | warnings.filterwarnings('ignore') | ||||||
|  | import re | ||||||
|  | import random | ||||||
|  |  | ||||||
|  | HUGGINGFACEHUB_API_TOKEN = None | ||||||
|  | os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN | ||||||
|  |  | ||||||
|  | def parse_profile(text, user_id, num_users): | ||||||
|  |     match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text) | ||||||
|  |     if not match: | ||||||
|  |         return None  # Skip invalid responses | ||||||
|  |  | ||||||
|  |     name, bio, posts = match.groups() | ||||||
|  |      | ||||||
|  |     # Generate mock followers count (randomized for realism) | ||||||
|  |     followers = random.randint(10, 5000) | ||||||
|  |  | ||||||
|  |     # Generate mock friends (users with nearby IDs) | ||||||
|  |     friend_ids = [str(fid) for fid in range(num_users) if fid != user_id] | ||||||
|  |     random.shuffle(friend_ids) | ||||||
|  |     friends = friend_ids[:random.randint(1, num_users-1)]  # Each user gets 1-5 friends | ||||||
|  |  | ||||||
|  |     return { | ||||||
|  |         "user_id": str(user_id), | ||||||
|  |         "name": name.strip(), | ||||||
|  |         "followers": followers, | ||||||
|  |         "bio": bio.strip(), | ||||||
|  |         "posts": posts.strip(), | ||||||
|  |         "friends": friends | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | def generate_data(num_users): | ||||||
|  |     system_message = """You are a data generator creating user profiles for a social media app.  | ||||||
|  |     Always provide user profiles in this format: Name | Interest | Recent Activity. | ||||||
|  |     Do not include numbers, IDs, or assistant labels. Only return a properly formatted response. | ||||||
|  |  | ||||||
|  |     Example: Alice Wonderland | Exploring the world one frame at a time! | Just captured a stunning sunset.""" | ||||||
|  |     # prompt = PromptTemplate.from_template(template) | ||||||
|  |     prompt = ChatPromptTemplate ([ | ||||||
|  |         ("system", system_message), | ||||||
|  |         ("user", "Generate a user profile for user {user_id}") | ||||||
|  |     ]) | ||||||
|  |  | ||||||
|  |     llm = HuggingFaceEndpoint( | ||||||
|  |         task='text-generation', | ||||||
|  |         model="deepseek-ai/DeepSeek-R1", | ||||||
|  |         max_new_tokens=150, | ||||||
|  |         do_sample=True, | ||||||
|  |         top_k=60, | ||||||
|  |         temperature=1.0, | ||||||
|  |         top_p=0.9, | ||||||
|  |         huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN, | ||||||
|  |     ) | ||||||
|  |     llm_chain = prompt | llm | ||||||
|  |     data = [] | ||||||
|  |     for i in range(num_users):  | ||||||
|  |         raw_text = llm_chain.invoke({"user_id": i}) | ||||||
|  |         user_profile = parse_profile(raw_text, i, num_users) | ||||||
|  |         if user_profile: | ||||||
|  |             data.append(user_profile) | ||||||
|  |              | ||||||
|  |     return data | ||||||
| @@ -13,7 +13,7 @@ app = FastAPI() | |||||||
| if CACHE_STRATEGY == "Baseline": | if CACHE_STRATEGY == "Baseline": | ||||||
|     cache = BaselineCache(limit=CACHE_LIMIT) |     cache = BaselineCache(limit=CACHE_LIMIT) | ||||||
| elif CACHE_STRATEGY == "Prefetch": | elif CACHE_STRATEGY == "Prefetch": | ||||||
|     cache = PrefetchCache() |     cache = PrefetchCache(limit=CACHE_LIMIT) | ||||||
| elif CACHE_STRATEGY == "Tiered": | elif CACHE_STRATEGY == "Tiered": | ||||||
|     cache = TieredCache(limit=CACHE_LIMIT, l2_limit=L2_CACHE_LIMIT) |     cache = TieredCache(limit=CACHE_LIMIT, l2_limit=L2_CACHE_LIMIT) | ||||||
| elif CACHE_STRATEGY == "Seive": | elif CACHE_STRATEGY == "Seive": | ||||||
| @@ -37,8 +37,8 @@ def fetch_user_profile(user_id: str): | |||||||
|     return {"user_id": user_id, "profile": profile, "source": "database", "time_ms": (time.time() - start) * 1000} |     return {"user_id": user_id, "profile": profile, "source": "database", "time_ms": (time.time() - start) * 1000} | ||||||
|  |  | ||||||
| @app.post("/update_user/") | @app.post("/update_user/") | ||||||
| def modify_user_profile(user_id: str, name: str, followers: int, bio: str, posts: str): | def modify_user_profile(user_id: str, name: str, followers: int, bio: str, posts: str, friends: list[str]): | ||||||
|     """Update user profile and refresh cache""" |     """Update user profile and refresh cache""" | ||||||
|     update_user_profile(user_id, name, followers, bio, posts) |     update_user_profile(user_id, name, followers, bio, posts, friends) | ||||||
|     cache.invalidate(user_id)  # Invalidate old cache |     cache.invalidate(user_id)  # Invalidate old cache | ||||||
|     return {"message": "User profile updated successfully"} |     return {"message": "User profile updated successfully"} | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user