akashkolte commited on
Commit
c4376df
·
1 Parent(s): 626c0b8

added basic pipeline

Browse files
data/generate_users.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ # ── 3 hand-crafted AAC personas ───────────────────────────────────────────────
5
+ # Each has a distinct condition, voice, and bucketed memories.
6
+ # Depth > quantity: 3 rich personas beat 50 generic ones for retrieval quality.
7
+
8
+ PERSONAS = [
9
+
10
+ {
11
+ "profile": {
12
+ "name": "Mia Chen",
13
+ "age": 28,
14
+ "condition": "cerebral palsy",
15
+ "communication_style":"witty, dry humour, short punchy sentences, uses sarcasm",
16
+ "access_method": "webcam head-tracking",
17
+ "languages": ["English"]
18
+ },
19
+ "memory_buckets": {
20
+ "family": [
21
+ "My mom calls every Sunday and always asks if I've eaten. I love it but won't admit it.",
22
+ "My brother Ravi helped me set up this AAC system. He's at Cornell doing CS.",
23
+ "We do a family movie night every Diwali — always an 80s Bollywood film nobody likes except Dad.",
24
+ "My parents moved from Chengdu before I was born. We still make dumplings on Chinese New Year.",
25
+ "My sister Lena is three years younger and somehow already more responsible than me."
26
+ ],
27
+ "medical": [
28
+ "I have a PT session every Tuesday at 2pm with Dr. Sandra Hollis.",
29
+ "I use a power wheelchair. The joystick is on my left side.",
30
+ "I'm allergic to penicillin. I have to mention this at every hospital visit.",
31
+ "My spasticity is worse in cold weather. Winter in Chicago is not my friend.",
32
+ "I use baclofen for muscle tone. It makes me sleepy if I take it too early."
33
+ ],
34
+ "hobbies": [
35
+ "I follow competitive Smash Bros. I could beat most people if my hands worked differently.",
36
+ "I've been watching every Studio Ghibli film in order. Currently on Porco Rosso.",
37
+ "I collect vintage sci-fi paperbacks. Asimov and Le Guin mostly.",
38
+ "I got really into chess puzzles during lockdown. Still do them before bed.",
39
+ "I enjoy critiquing bad movie sequels. It's practically a hobby at this point."
40
+ ],
41
+ "daily_routine": [
42
+ "Mornings are slow. I need about 45 minutes before I feel like a person.",
43
+ "I order from the same Thai place every Friday. Green curry, always.",
44
+ "I keep a voice memo journal since typing long things is tiring.",
45
+ "I usually watch one episode of something after dinner to decompress.",
46
+ "My caregiver Marcus arrives at 8am on weekdays. He makes decent coffee."
47
+ ],
48
+ "social": [
49
+ "My best friend Priya visits on weekends. She narrates everything like a nature documentary.",
50
+ "I'm part of an online disability advocacy group. We meet on Zoom every other Wednesday.",
51
+ "I don't love big parties. Small dinners with three or four people are my ideal.",
52
+ "My neighbour Tom always stops to chat when I'm outside. He's retired and lonely, I think.",
53
+ "I met most of my close friends through a gaming Discord server."
54
+ ]
55
+ }
56
+ },
57
+
58
+ {
59
+ "profile": {
60
+ "name": "Gerald Okafor",
61
+ "age": 61,
62
+ "condition": "ALS (early-to-mid stage)",
63
+ "communication_style":"formal, measured, eloquent, longer structured sentences",
64
+ "access_method": "eye-gaze device",
65
+ "languages": ["English"]
66
+ },
67
+ "memory_buckets": {
68
+ "family": [
69
+ "My wife Constance and I have been married for 34 years. She is the reason I stay organised.",
70
+ "My son Emeka is a civil engineer based in Houston. He calls every Thursday evening.",
71
+ "My daughter Adaeze is doing her residency in paediatrics in Baltimore. I am very proud.",
72
+ "We used to take a family trip to Lagos every two years to visit my mother's side.",
73
+ "My youngest grandchild, Tobenna, was born last April. I have not met him in person yet."
74
+ ],
75
+ "medical": [
76
+ "I was diagnosed with ALS in November 2024. I am still adjusting to what that means day to day.",
77
+ "My speech was the first thing to decline noticeably. That is why I began using AAC.",
78
+ "I see my neurologist Dr. Patricia Eze at Northwestern every six weeks.",
79
+ "I take riluzole daily. I have not noticed significant side effects so far.",
80
+ "My occupational therapist is helping me adapt my home office for continued work."
81
+ ],
82
+ "hobbies": [
83
+ "I taught economics at DePaul University for twenty-two years.",
84
+ "I have read most of Chinua Achebe's work. Things Fall Apart shaped how I see storytelling.",
85
+ "I enjoy chess — classical time controls, not blitz. Patience is the point.",
86
+ "I used to cook elaborate Sunday stews. Constance has taken that over now, which is bittersweet.",
87
+ "I listen to Fela Kuti when I need to feel grounded. Always has."
88
+ ],
89
+ "daily_routine": [
90
+ "I begin each morning by reading two newspapers — the Tribune and the Guardian.",
91
+ "I try to write for at least thirty minutes each day, even if it is just reflections.",
92
+ "Afternoons are for rest. My energy is most reliable in the mornings.",
93
+ "Constance and I watch the evening news together. We have done this for decades.",
94
+ "I use the eye-gaze device for most communication now. It takes patience but it works."
95
+ ],
96
+ "social": [
97
+ "My closest friend is Charles Nwosu. We have known each other since secondary school in Enugu.",
98
+ "I stay in touch with former colleagues at DePaul, though visits have become less frequent.",
99
+ "My church community at St. Clement has been a source of genuine support since my diagnosis.",
100
+ "I prefer one-on-one conversations. I find group settings harder to follow now.",
101
+ "I joined an ALS support group that meets virtually. It helps more than I expected."
102
+ ]
103
+ }
104
+ },
105
+
106
+ {
107
+ "profile": {
108
+ "name": "Arjun Mehta",
109
+ "age": 17,
110
+ "condition": "autism spectrum disorder (non-verbal)",
111
+ "communication_style":"direct, topic-specific, narrow vocabulary, code-switches Hindi/English, routine-focused",
112
+ "access_method": "tablet touch grid + AAC app",
113
+ "languages": ["English", "Hindi"]
114
+ },
115
+ "memory_buckets": {
116
+ "family": [
117
+ "Mummy makes aloo paratha on Sunday mornings. That is my favourite thing.",
118
+ "Papa works at a software company. He brings home a samosa sometimes on Fridays.",
119
+ "My dadi lives with us. She watches serials very loudly but I like that she is home.",
120
+ "My cousin Rohan visits in the summer. We play Minecraft together for many hours.",
121
+ "Mummy knows what I want even when I cannot say it. She is very good at that."
122
+ ],
123
+ "medical": [
124
+ "I see my therapist Riya didi every Wednesday at 4pm.",
125
+ "I do not like the occupational therapy exercises but I do them.",
126
+ "I cannot eat food that has a slimy texture. It makes me feel very bad.",
127
+ "I take melatonin at night. Without it, sleeping is very hard.",
128
+ "My school has a support aide named Mr. Fernandez. He is calm and that helps."
129
+ ],
130
+ "hobbies": [
131
+ "I know the complete timetable of all Mumbai Metro lines.",
132
+ "I like sorting my LEGO bricks by colour and size before building.",
133
+ "My favourite YouTube channel is about deep sea creatures. Anglerfish are very strange.",
134
+ "I have watched the same three episodes of Doraemon more than fifty times each.",
135
+ "I am learning the capitals of every country. I know 142 so far."
136
+ ],
137
+ "daily_routine": [
138
+ "I wake up at 6:47am. Changing this time makes my whole day feel wrong.",
139
+ "I eat the same breakfast — two rotis with ghee and one glass of milk.",
140
+ "School starts at 8:30am. I like to arrive before the other students.",
141
+ "After school I need quiet time for at least one hour. No talking.",
142
+ "Dinner must be at 7:30pm. If it is late I feel very unsettled."
143
+ ],
144
+ "social": [
145
+ "I have one friend at school named Vivaan. We do not talk much but we sit together.",
146
+ "I do not like it when people stand too close. One arm's distance is comfortable.",
147
+ "I prefer typing to speaking when I need to say something important.",
148
+ "Loud places with many people feel like too much information at once.",
149
+ "I like it when people tell me exactly what is going to happen next."
150
+ ]
151
+ }
152
+ }
153
+ ]
154
+
155
+
156
+ def main():
157
+ os.makedirs("memories", exist_ok=True)
158
+
159
+ user_index = []
160
+
161
+ for persona in PERSONAS:
162
+ uid = persona["profile"]["name"].lower().replace(" ", "_")
163
+ path = f"memories/{uid}.json"
164
+
165
+ with open(path, "w") as f:
166
+ json.dump(persona, f, indent=2, ensure_ascii=False)
167
+
168
+ user_index.append({
169
+ "id": uid,
170
+ "name": persona["profile"]["name"],
171
+ "condition": persona["profile"]["condition"],
172
+ "style": persona["profile"]["communication_style"],
173
+ "file": path
174
+ })
175
+
176
+ print(f" Wrote {path}")
177
+
178
+ with open("users.json", "w") as f:
179
+ json.dump({"users": user_index}, f, indent=2, ensure_ascii=False)
180
+
181
+ print(f"\n Done — {len(PERSONAS)} personas written to memories/")
182
+ print(" Files:", [u["file"] for u in user_index])
183
+
184
+
185
+ if __name__ == "__main__":
186
+ main()
data/memories/arjun_mehta.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "profile": {
3
+ "name": "Arjun Mehta",
4
+ "age": 17,
5
+ "condition": "autism spectrum disorder (non-verbal)",
6
+ "communication_style": "direct, topic-specific, narrow vocabulary, code-switches Hindi/English, routine-focused",
7
+ "access_method": "tablet touch grid + AAC app",
8
+ "languages": [
9
+ "English",
10
+ "Hindi"
11
+ ]
12
+ },
13
+ "memory_buckets": {
14
+ "family": [
15
+ "Mummy makes aloo paratha on Sunday mornings. That is my favourite thing.",
16
+ "Papa works at a software company. He brings home a samosa sometimes on Fridays.",
17
+ "My dadi lives with us. She watches serials very loudly but I like that she is home.",
18
+ "My cousin Rohan visits in the summer. We play Minecraft together for many hours.",
19
+ "Mummy knows what I want even when I cannot say it. She is very good at that."
20
+ ],
21
+ "medical": [
22
+ "I see my therapist Riya didi every Wednesday at 4pm.",
23
+ "I do not like the occupational therapy exercises but I do them.",
24
+ "I cannot eat food that has a slimy texture. It makes me feel very bad.",
25
+ "I take melatonin at night. Without it, sleeping is very hard.",
26
+ "My school has a support aide named Mr. Fernandez. He is calm and that helps."
27
+ ],
28
+ "hobbies": [
29
+ "I know the complete timetable of all Mumbai Metro lines.",
30
+ "I like sorting my LEGO bricks by colour and size before building.",
31
+ "My favourite YouTube channel is about deep sea creatures. Anglerfish are very strange.",
32
+ "I have watched the same three episodes of Doraemon more than fifty times each.",
33
+ "I am learning the capitals of every country. I know 142 so far."
34
+ ],
35
+ "daily_routine": [
36
+ "I wake up at 6:47am. Changing this time makes my whole day feel wrong.",
37
+ "I eat the same breakfast — two rotis with ghee and one glass of milk.",
38
+ "School starts at 8:30am. I like to arrive before the other students.",
39
+ "After school I need quiet time for at least one hour. No talking.",
40
+ "Dinner must be at 7:30pm. If it is late I feel very unsettled."
41
+ ],
42
+ "social": [
43
+ "I have one friend at school named Vivaan. We do not talk much but we sit together.",
44
+ "I do not like it when people stand too close. One arm's distance is comfortable.",
45
+ "I prefer typing to speaking when I need to say something important.",
46
+ "Loud places with many people feel like too much information at once.",
47
+ "I like it when people tell me exactly what is going to happen next."
48
+ ]
49
+ }
50
+ }
data/memories/gerald_okafor.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "profile": {
3
+ "name": "Gerald Okafor",
4
+ "age": 61,
5
+ "condition": "ALS (early-to-mid stage)",
6
+ "communication_style": "formal, measured, eloquent, longer structured sentences",
7
+ "access_method": "eye-gaze device",
8
+ "languages": [
9
+ "English"
10
+ ]
11
+ },
12
+ "memory_buckets": {
13
+ "family": [
14
+ "My wife Constance and I have been married for 34 years. She is the reason I stay organised.",
15
+ "My son Emeka is a civil engineer based in Houston. He calls every Thursday evening.",
16
+ "My daughter Adaeze is doing her residency in paediatrics in Baltimore. I am very proud.",
17
+ "We used to take a family trip to Lagos every two years to visit my mother's side.",
18
+ "My youngest grandchild, Tobenna, was born last April. I have not met him in person yet."
19
+ ],
20
+ "medical": [
21
+ "I was diagnosed with ALS in November 2024. I am still adjusting to what that means day to day.",
22
+ "My speech was the first thing to decline noticeably. That is why I began using AAC.",
23
+ "I see my neurologist Dr. Patricia Eze at Northwestern every six weeks.",
24
+ "I take riluzole daily. I have not noticed significant side effects so far.",
25
+ "My occupational therapist is helping me adapt my home office for continued work."
26
+ ],
27
+ "hobbies": [
28
+ "I taught economics at DePaul University for twenty-two years.",
29
+ "I have read most of Chinua Achebe's work. Things Fall Apart shaped how I see storytelling.",
30
+ "I enjoy chess — classical time controls, not blitz. Patience is the point.",
31
+ "I used to cook elaborate Sunday stews. Constance has taken that over now, which is bittersweet.",
32
+ "I listen to Fela Kuti when I need to feel grounded. Always has."
33
+ ],
34
+ "daily_routine": [
35
+ "I begin each morning by reading two newspapers — the Tribune and the Guardian.",
36
+ "I try to write for at least thirty minutes each day, even if it is just reflections.",
37
+ "Afternoons are for rest. My energy is most reliable in the mornings.",
38
+ "Constance and I watch the evening news together. We have done this for decades.",
39
+ "I use the eye-gaze device for most communication now. It takes patience but it works."
40
+ ],
41
+ "social": [
42
+ "My closest friend is Charles Nwosu. We have known each other since secondary school in Enugu.",
43
+ "I stay in touch with former colleagues at DePaul, though visits have become less frequent.",
44
+ "My church community at St. Clement has been a source of genuine support since my diagnosis.",
45
+ "I prefer one-on-one conversations. I find group settings harder to follow now.",
46
+ "I joined an ALS support group that meets virtually. It helps more than I expected."
47
+ ]
48
+ }
49
+ }
data/memories/mia_chen.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "profile": {
3
+ "name": "Mia Chen",
4
+ "age": 28,
5
+ "condition": "cerebral palsy",
6
+ "communication_style": "witty, dry humour, short punchy sentences, uses sarcasm",
7
+ "access_method": "webcam head-tracking",
8
+ "languages": [
9
+ "English"
10
+ ]
11
+ },
12
+ "memory_buckets": {
13
+ "family": [
14
+ "My mom calls every Sunday and always asks if I've eaten. I love it but won't admit it.",
15
+ "My brother Ravi helped me set up this AAC system. He's at Cornell doing CS.",
16
+ "We do a family movie night every Diwali — always an 80s Bollywood film nobody likes except Dad.",
17
+ "My parents moved from Chengdu before I was born. We still make dumplings on Chinese New Year.",
18
+ "My sister Lena is three years younger and somehow already more responsible than me."
19
+ ],
20
+ "medical": [
21
+ "I have a PT session every Tuesday at 2pm with Dr. Sandra Hollis.",
22
+ "I use a power wheelchair. The joystick is on my left side.",
23
+ "I'm allergic to penicillin. I have to mention this at every hospital visit.",
24
+ "My spasticity is worse in cold weather. Winter in Chicago is not my friend.",
25
+ "I use baclofen for muscle tone. It makes me sleepy if I take it too early."
26
+ ],
27
+ "hobbies": [
28
+ "I follow competitive Smash Bros. I could beat most people if my hands worked differently.",
29
+ "I've been watching every Studio Ghibli film in order. Currently on Porco Rosso.",
30
+ "I collect vintage sci-fi paperbacks. Asimov and Le Guin mostly.",
31
+ "I got really into chess puzzles during lockdown. Still do them before bed.",
32
+ "I enjoy critiquing bad movie sequels. It's practically a hobby at this point."
33
+ ],
34
+ "daily_routine": [
35
+ "Mornings are slow. I need about 45 minutes before I feel like a person.",
36
+ "I order from the same Thai place every Friday. Green curry, always.",
37
+ "I keep a voice memo journal since typing long things is tiring.",
38
+ "I usually watch one episode of something after dinner to decompress.",
39
+ "My caregiver Marcus arrives at 8am on weekdays. He makes decent coffee."
40
+ ],
41
+ "social": [
42
+ "My best friend Priya visits on weekends. She narrates everything like a nature documentary.",
43
+ "I'm part of an online disability advocacy group. We meet on Zoom every other Wednesday.",
44
+ "I don't love big parties. Small dinners with three or four people are my ideal.",
45
+ "My neighbour Tom always stops to chat when I'm outside. He's retired and lonely, I think.",
46
+ "I met most of my close friends through a gaming Discord server."
47
+ ]
48
+ }
49
+ }
data/users.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "users": [
3
+ {
4
+ "id": "mia_chen",
5
+ "name": "Mia Chen",
6
+ "condition": "cerebral palsy",
7
+ "style": "witty, dry humour, short punchy sentences, uses sarcasm",
8
+ "file": "memories/mia_chen.json"
9
+ },
10
+ {
11
+ "id": "gerald_okafor",
12
+ "name": "Gerald Okafor",
13
+ "condition": "ALS (early-to-mid stage)",
14
+ "style": "formal, measured, eloquent, longer structured sentences",
15
+ "file": "memories/gerald_okafor.json"
16
+ },
17
+ {
18
+ "id": "arjun_mehta",
19
+ "name": "Arjun Mehta",
20
+ "condition": "autism spectrum disorder (non-verbal)",
21
+ "style": "direct, topic-specific, narrow vocabulary, code-switches Hindi/English, routine-focused",
22
+ "file": "memories/arjun_mehta.json"
23
+ }
24
+ ]
25
+ }