mirror of
				https://github.com/ltcptgeneral/cs239-caching.git
				synced 2025-10-24 21:09:20 +00:00 
			
		
		
		
	fixed issue with generate data
This commit is contained in:
		
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -10,11 +10,15 @@ import json | ||||
| HUGGINGFACEHUB_API_TOKEN = None | ||||
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN | ||||
|  | ||||
| def parse_profile(text, user_id, num_users): | ||||
| def valid_data(text): | ||||
|     match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text) | ||||
|     if not match: | ||||
|         return None  # Skip invalid responses | ||||
|         return False | ||||
|     else: | ||||
|         return True | ||||
|  | ||||
| def parse_profile(text, user_id, num_users): | ||||
|     match = re.search(r"([A-Za-z ]+)\|([A-Za-z &\-!]+)\|([A-Za-z .',!?&\-]+)", text) | ||||
|     name, bio, posts = match.groups() | ||||
|      | ||||
|     # Generate mock followers count (randomized for realism) | ||||
| @@ -40,7 +44,6 @@ def generate_data(num_users): | ||||
|     Do not include numbers, IDs, or assistant labels. Only return a properly formatted response. | ||||
|  | ||||
|     Example: Alice Wonderland | Exploring the world one frame at a time! | Just captured a stunning sunset.""" | ||||
|     # prompt = PromptTemplate.from_template(template) | ||||
|     prompt = ChatPromptTemplate ([ | ||||
|         ("system", system_message), | ||||
|         ("user", "Generate a user profile for user {user_id}") | ||||
| @@ -58,11 +61,16 @@ def generate_data(num_users): | ||||
|     ) | ||||
|     llm_chain = prompt | llm | ||||
|     data = {} | ||||
|     for i in range(num_users):  | ||||
|     i = 0 | ||||
|     user_id = 0 | ||||
|     while user_id < num_users:  | ||||
|         raw_text = llm_chain.invoke({"user_id": i}) | ||||
|         user_profile = parse_profile(raw_text, i, num_users) | ||||
|         if user_profile: | ||||
|             data[i] = user_profile | ||||
|         while not valid_data(raw_text): | ||||
|             i = i + 1 | ||||
|             raw_text = llm_chain.invoke({"user_id": i}) | ||||
|         user_profile = parse_profile(raw_text, user_id, num_users) | ||||
|         user_id = user_id + 1 | ||||
|         data[user_id] = user_profile | ||||
|              | ||||
|     return data | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user