mirror of
https://github.com/ltcptgeneral/cs239-caching.git
synced 2025-04-01 12:33:25 +00:00
Abstracted data loading so that data can be loaded simply through the config file. Moved Mike's LLM data generation code and my own dummy data generating code to the database folder.
This commit is contained in:
parent
5c25a2b099
commit
dd5fc9f83c
@ -13,3 +13,4 @@ config = load_config()
|
||||
CACHE_STRATEGY = os.getenv("CACHE_STRATEGY", config.get("cache_strategy", "Baseline"))
|
||||
CACHE_LIMIT = config.get("cache_limit", 10)
|
||||
L2_CACHE_LIMIT = config.get("l2_cache_limit", 100)
|
||||
DB_FILE = config.get("db_file", "llmData_sns.json")
|
||||
|
@ -1,3 +1,4 @@
|
||||
cache_strategy: "Baseline" # Change this to "Prefetch" or "Tiered" or "Seive"
|
||||
cache_limit: 10
|
||||
l2_cache_limit: 100
|
||||
db_file: "llmData_sns.json" # Change this to the name of any json file within the "database/datastore" folder
|
@ -1,10 +1,11 @@
|
||||
|
||||
from tinydb import TinyDB, Query
|
||||
from generate_data import generate_data
|
||||
from config import DB_FILE
|
||||
|
||||
DB_LOCATION = "database/datastore/" + DB_FILE
|
||||
|
||||
# Initialize TinyDB as a NoSQL key-value store
|
||||
DB_FILE = "database.json"
|
||||
db = TinyDB(DB_FILE)
|
||||
db = TinyDB(DB_LOCATION)
|
||||
User = Query()
|
||||
|
||||
def get_user_profile(user_id):
|
||||
@ -19,9 +20,12 @@ def update_user_profile(user_id, name, followers, bio, posts, friends):
|
||||
def init_db():
|
||||
"""Ensure TinyDB is initialized before FastAPI starts and prepopulate some data"""
|
||||
global db
|
||||
db = TinyDB(DB_FILE) # Reload TinyDB if needed
|
||||
db = TinyDB(DB_LOCATION) # Reload TinyDB if needed
|
||||
|
||||
# Prepopulate database with some sample users if empty
|
||||
if len(db) == 0:
|
||||
data = generate_data(100)
|
||||
db.insert_multiple(data)
|
||||
db.insert_multiple([
|
||||
{"user_id": "1", "name": "Alice", "followers": 100, "bio": "Love coding!", "posts": "Hello, world!"},
|
||||
{"user_id": "2", "name": "Bob", "followers": 200, "bio": "Tech enthusiast", "posts": "AI is amazing!"},
|
||||
{"user_id": "3", "name": "Charlie", "followers": 50, "bio": "Blogger", "posts": "Check out my latest post!"}
|
||||
])
|
||||
|
44
app/database/create_basic_dummy_data.py
Normal file
44
app/database/create_basic_dummy_data.py
Normal file
@ -0,0 +1,44 @@
|
||||
# Quick script to create basic dummy data for the caching system
|
||||
# Current theme/usage is in a social media microservice setting such as Twitter
|
||||
# Current database schema:
|
||||
### user_id (primary key), str
|
||||
### username, string
|
||||
### num_followers, int
|
||||
### posts, str (for now, can be list later on)
|
||||
### friends, list[user_id]
|
||||
|
||||
import json
|
||||
import random
|
||||
|
||||
# Parameters to change the distribution/random ranges
|
||||
TOTAL_USERS = 20
|
||||
MIN_FOLLOWERS, MAX_FOLLOWERS = 0, 5
|
||||
MIN_POSTS, MAX_POSTS = 5, 10
|
||||
MIN_FRIENDS, MAX_FRIENDS = 1, 5
|
||||
|
||||
# Create the user data
|
||||
# TODO if we want to vary the user data more, we can inflate the posts into a list of strings and make the strings very long :3"
|
||||
data = {}
|
||||
for user_id in range( TOTAL_USERS ):
|
||||
user = {}
|
||||
user["user_id"] = str( user_id )
|
||||
user["username"] = "user" + str( user_id )
|
||||
user["num_followers"] = random.randint( MIN_FOLLOWERS, MAX_FOLLOWERS )
|
||||
|
||||
# Just make a single post for now, can consider mutliple posts later
|
||||
user["posts"] = f"This is user {user_id}'s post!"
|
||||
|
||||
# posts = []
|
||||
# for post_num in range( random.randint( MIN_FOLLOWERS, MAX_FOLLOWERS ) ):
|
||||
# posts.append( f"This is user {user_id}'s post {str( post_num + 1 )}!" )
|
||||
# user["posts"] = posts
|
||||
|
||||
friends = []
|
||||
user["friends"] = random.sample( [u_id for u_id in range( TOTAL_USERS ) if u_id != user_id], random.randint( MIN_FOLLOWERS, MAX_FOLLOWERS ) )
|
||||
|
||||
data[user_id] = user
|
||||
|
||||
# Load the data into a json object and write it into the datastore folder
|
||||
json_object = json.dumps( {"_default": data}, indent=4 )
|
||||
with open( "datastore/basicDummy_sns.json", "w" ) as f:
|
||||
f.write( json_object )
|
214
app/database/datastore/basicDummy_sns.json
Normal file
214
app/database/datastore/basicDummy_sns.json
Normal file
@ -0,0 +1,214 @@
|
||||
{
|
||||
"_default": {
|
||||
"0": {
|
||||
"user_id": "0",
|
||||
"username": "user0",
|
||||
"num_followers": 0,
|
||||
"posts": "This is user 0's post!",
|
||||
"friends": [
|
||||
17,
|
||||
13,
|
||||
14
|
||||
]
|
||||
},
|
||||
"1": {
|
||||
"user_id": "1",
|
||||
"username": "user1",
|
||||
"num_followers": 1,
|
||||
"posts": "This is user 1's post!",
|
||||
"friends": [
|
||||
8,
|
||||
17,
|
||||
15,
|
||||
3
|
||||
]
|
||||
},
|
||||
"2": {
|
||||
"user_id": "2",
|
||||
"username": "user2",
|
||||
"num_followers": 4,
|
||||
"posts": "This is user 2's post!",
|
||||
"friends": [
|
||||
9,
|
||||
12
|
||||
]
|
||||
},
|
||||
"3": {
|
||||
"user_id": "3",
|
||||
"username": "user3",
|
||||
"num_followers": 1,
|
||||
"posts": "This is user 3's post!",
|
||||
"friends": [
|
||||
10,
|
||||
7,
|
||||
8,
|
||||
14
|
||||
]
|
||||
},
|
||||
"4": {
|
||||
"user_id": "4",
|
||||
"username": "user4",
|
||||
"num_followers": 0,
|
||||
"posts": "This is user 4's post!",
|
||||
"friends": [
|
||||
13,
|
||||
0
|
||||
]
|
||||
},
|
||||
"5": {
|
||||
"user_id": "5",
|
||||
"username": "user5",
|
||||
"num_followers": 4,
|
||||
"posts": "This is user 5's post!",
|
||||
"friends": []
|
||||
},
|
||||
"6": {
|
||||
"user_id": "6",
|
||||
"username": "user6",
|
||||
"num_followers": 3,
|
||||
"posts": "This is user 6's post!",
|
||||
"friends": []
|
||||
},
|
||||
"7": {
|
||||
"user_id": "7",
|
||||
"username": "user7",
|
||||
"num_followers": 4,
|
||||
"posts": "This is user 7's post!",
|
||||
"friends": [
|
||||
15,
|
||||
13,
|
||||
11,
|
||||
17
|
||||
]
|
||||
},
|
||||
"8": {
|
||||
"user_id": "8",
|
||||
"username": "user8",
|
||||
"num_followers": 2,
|
||||
"posts": "This is user 8's post!",
|
||||
"friends": [
|
||||
4,
|
||||
19
|
||||
]
|
||||
},
|
||||
"9": {
|
||||
"user_id": "9",
|
||||
"username": "user9",
|
||||
"num_followers": 0,
|
||||
"posts": "This is user 9's post!",
|
||||
"friends": [
|
||||
2,
|
||||
10
|
||||
]
|
||||
},
|
||||
"10": {
|
||||
"user_id": "10",
|
||||
"username": "user10",
|
||||
"num_followers": 5,
|
||||
"posts": "This is user 10's post!",
|
||||
"friends": [
|
||||
7,
|
||||
12
|
||||
]
|
||||
},
|
||||
"11": {
|
||||
"user_id": "11",
|
||||
"username": "user11",
|
||||
"num_followers": 1,
|
||||
"posts": "This is user 11's post!",
|
||||
"friends": [
|
||||
12,
|
||||
8,
|
||||
18,
|
||||
4
|
||||
]
|
||||
},
|
||||
"12": {
|
||||
"user_id": "12",
|
||||
"username": "user12",
|
||||
"num_followers": 1,
|
||||
"posts": "This is user 12's post!",
|
||||
"friends": [
|
||||
5,
|
||||
17,
|
||||
8,
|
||||
4
|
||||
]
|
||||
},
|
||||
"13": {
|
||||
"user_id": "13",
|
||||
"username": "user13",
|
||||
"num_followers": 4,
|
||||
"posts": "This is user 13's post!",
|
||||
"friends": [
|
||||
16,
|
||||
10,
|
||||
2,
|
||||
8,
|
||||
1
|
||||
]
|
||||
},
|
||||
"14": {
|
||||
"user_id": "14",
|
||||
"username": "user14",
|
||||
"num_followers": 3,
|
||||
"posts": "This is user 14's post!",
|
||||
"friends": [
|
||||
11
|
||||
]
|
||||
},
|
||||
"15": {
|
||||
"user_id": "15",
|
||||
"username": "user15",
|
||||
"num_followers": 2,
|
||||
"posts": "This is user 15's post!",
|
||||
"friends": [
|
||||
13,
|
||||
14
|
||||
]
|
||||
},
|
||||
"16": {
|
||||
"user_id": "16",
|
||||
"username": "user16",
|
||||
"num_followers": 5,
|
||||
"posts": "This is user 16's post!",
|
||||
"friends": [
|
||||
13,
|
||||
3,
|
||||
14
|
||||
]
|
||||
},
|
||||
"17": {
|
||||
"user_id": "17",
|
||||
"username": "user17",
|
||||
"num_followers": 3,
|
||||
"posts": "This is user 17's post!",
|
||||
"friends": [
|
||||
0,
|
||||
19,
|
||||
2,
|
||||
16
|
||||
]
|
||||
},
|
||||
"18": {
|
||||
"user_id": "18",
|
||||
"username": "user18",
|
||||
"num_followers": 4,
|
||||
"posts": "This is user 18's post!",
|
||||
"friends": [
|
||||
9
|
||||
]
|
||||
},
|
||||
"19": {
|
||||
"user_id": "19",
|
||||
"username": "user19",
|
||||
"num_followers": 1,
|
||||
"posts": "This is user 19's post!",
|
||||
"friends": [
|
||||
1,
|
||||
12,
|
||||
9
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
1
app/database/datastore/llmData_sns.json
Normal file
1
app/database/datastore/llmData_sns.json
Normal file
File diff suppressed because one or more lines are too long
@ -5,6 +5,7 @@ import warnings
|
||||
warnings.filterwarnings('ignore')
|
||||
import re
|
||||
import random
|
||||
import json
|
||||
|
||||
HUGGINGFACEHUB_API_TOKEN = None
|
||||
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
|
||||
@ -56,11 +57,20 @@ def generate_data(num_users):
|
||||
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
||||
)
|
||||
llm_chain = prompt | llm
|
||||
data = []
|
||||
data = {}
|
||||
for i in range(num_users):
|
||||
raw_text = llm_chain.invoke({"user_id": i})
|
||||
user_profile = parse_profile(raw_text, i, num_users)
|
||||
if user_profile:
|
||||
data.append(user_profile)
|
||||
data[i] = user_profile
|
||||
|
||||
return data
|
||||
return data
|
||||
|
||||
if __name__ == "__main__":
|
||||
data = generate_data(100)
|
||||
|
||||
# Create json file
|
||||
json_object = json.dumps( {"_default": data}, indent=4 )
|
||||
with open( "datastore/llmData_sns.json", "w" ) as f:
|
||||
f.write( json_object )
|
||||
|
Loading…
x
Reference in New Issue
Block a user