Karthik8nitt commited on
Commit
2d17d7b
·
verified ·
1 Parent(s): 56d302d

Add synthetic dataset generator matching user's schema

Browse files
Files changed (1) hide show
  1. generate_synthetic_dataset.py +432 -0
generate_synthetic_dataset.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Synthetic floorplan dataset generator for Indian residential construction.
3
+ Produces (parametric_input, floorplan_output) pairs matching the user's ProjectCreate schema.
4
+ """
5
+ import json, random, math
6
+ from dataclasses import dataclass
7
+ from typing import List, Dict, Any, Optional, Tuple
8
+ from datasets import Dataset, DatasetDict
9
+
10
+ ROOM_SIZES = {
11
+ "bedroom": {"min_w": 2.7, "max_w": 4.2, "min_d": 3.0, "max_d": 4.8, "area_range": (10, 20)},
12
+ "master_bedroom":{"min_w": 3.3, "max_w": 4.5, "min_d": 3.6, "max_d": 5.4, "area_range": (14, 25)},
13
+ "toilet": {"min_w": 1.2, "max_w": 1.8, "min_d": 1.8, "max_d": 2.7, "area_range": (2.5, 5)},
14
+ "kitchen": {"min_w": 2.4, "max_w": 3.6, "min_d": 2.7, "max_d": 4.2, "area_range": (7, 15)},
15
+ "living": {"min_w": 3.3, "max_w": 5.4, "min_d": 3.6, "max_d": 6.0, "area_range": (14, 30)},
16
+ "dining": {"min_w": 2.7, "max_w": 3.6, "min_d": 3.0, "max_d": 4.5, "area_range": (9, 16)},
17
+ "pooja": {"min_w": 1.2, "max_w": 1.8, "min_d": 1.5, "max_d": 2.1, "area_range": (1.8, 4)},
18
+ "study": {"min_w": 2.1, "max_w": 3.0, "min_d": 2.4, "max_d": 3.6, "area_range": (5, 11)},
19
+ "balcony": {"min_w": 1.2, "max_w": 2.4, "min_d": 2.4, "max_d": 4.5, "area_range": (3, 10)},
20
+ "parking": {"min_w": 2.5, "max_w": 3.3, "min_d": 5.0, "max_d": 6.0, "area_range": (12, 20)},
21
+ "staircase": {"min_w": 2.7, "max_w": 3.3, "min_d": 3.0, "max_d": 4.5, "area_range": (8, 15)},
22
+ "corridor": {"min_w": 1.0, "max_w": 1.5, "min_d": 2.4, "max_d": 6.0, "area_range": (2.5, 9)},
23
+ "utility": {"min_w": 1.5, "max_w": 2.4, "min_d": 1.8, "max_d": 3.0, "area_range": (3, 7)},
24
+ "store": {"min_w": 1.2, "max_w": 2.1, "min_d": 1.5, "max_d": 2.4, "area_range": (2, 5)},
25
+ }
26
+
27
+ PLOT_SHAPES = ["rectangular", "l_shaped", "trapezoid"]
28
+ DIRECTIONS = ["N", "S", "E", "W"]
29
+ CITIES = ["Delhi", "Mumbai", "Bangalore", "Chennai", "Hyderabad", "Pune", "Kolkata", "Ahmedabad", "Jaipur", "other"]
30
+ MUNICIPALITIES = ["MC", "MDA", "PMA", "BDA", "GHMC", "BBMP", "MCD", "KMC", "JDA", None]
31
+
32
+ def rect_polygon(x, y, w, d):
33
+ return [[round(v, 2) for v in p] for p in [[x, y], [x+w, y], [x+w, y+d], [x, y+d]]]
34
+
35
+ def polygon_area(poly):
36
+ n = len(poly)
37
+ area = 0.0
38
+ for i in range(n):
39
+ x1, y1 = poly[i]
40
+ x2, y2 = poly[(i+1)%n]
41
+ area += x1*y2 - x2*y1
42
+ return abs(area)/2.0
43
+
44
+ def polygon_bbox(poly):
45
+ xs = [p[0] for p in poly]; ys = [p[1] for p in poly]
46
+ return min(xs), min(ys), max(xs), max(ys)
47
+
48
+ def make_plot_boundary(params):
49
+ shape = params.get("plot_shape", "rectangular")
50
+ L, W = params["plot_length"], params["plot_width"]
51
+ if shape == "rectangular":
52
+ return rect_polygon(0, 0, L, W)
53
+ if shape == "l_shaped":
54
+ cw = params.get("cutout_width", L*0.3)
55
+ ch = params.get("cutout_height", W*0.3)
56
+ corner = params.get("cutout_corner", "NE")
57
+ if corner == "NE":
58
+ return [[0,0],[L,0],[L,W-ch],[L-cw,W-ch],[L-cw,W],[0,W]]
59
+ elif corner == "NW":
60
+ return [[0,0],[L,0],[L,W],[cw,W],[cw,W-ch],[0,W-ch]]
61
+ elif corner == "SE":
62
+ return [[0,0],[L-cw,0],[L-cw,ch],[L,ch],[L,W],[0,W]]
63
+ elif corner == "SW":
64
+ return [[cw,0],[L,0],[L,W],[0,W],[0,ch],[cw,ch]]
65
+ return rect_polygon(0,0,L,W)
66
+ if shape == "trapezoid":
67
+ fw = params.get("plot_front_width", L)
68
+ rw = params.get("plot_rear_width", L*0.8)
69
+ off = params.get("plot_side_offset", 0.0)
70
+ return [[0,0],[fw,0],[fw-off,W],[-off,W]]
71
+ return rect_polygon(0,0,L,W)
72
+
73
+ def make_buildable_boundary(plot_poly, params):
74
+ sf = params.get("setback_front", 1.5)
75
+ sr = params.get("setback_rear", 1.0)
76
+ sl = params.get("setback_left", 1.0)
77
+ srt = params.get("setback_right", 1.0)
78
+ minx, miny, maxx, maxy = polygon_bbox(plot_poly)
79
+ if len(plot_poly) == 4 and abs((maxx-minx)*(maxy-miny) - polygon_area(plot_poly)) < 0.1:
80
+ return rect_polygon(minx+sl, miny+sf, maxx-minx-sl-srt, maxy-miny-sf-sr)
81
+ return rect_polygon(minx+sl, miny+sf, maxx-minx-sl-srt, maxy-miny-sf-sr)
82
+
83
+ def distribute_width(total, n):
84
+ base = total/n
85
+ bays = []
86
+ remaining = total
87
+ for i in range(n):
88
+ if i == n-1:
89
+ bays.append(round(remaining, 2))
90
+ else:
91
+ bay = max(2.4, round(base, 1))
92
+ bay = min(bay, remaining - 2.4*(n-i-1))
93
+ bays.append(bay)
94
+ remaining -= bay
95
+ return bays
96
+
97
+ def distribute_depth(total, n):
98
+ base = total/n
99
+ rows = []
100
+ remaining = total
101
+ for i in range(n):
102
+ if i == n-1:
103
+ rows.append(round(remaining, 2))
104
+ else:
105
+ d = max(2.4, round(base, 1))
106
+ d = min(d, remaining - 2.4*(n-i-1))
107
+ rows.append(d)
108
+ remaining -= d
109
+ return rows
110
+
111
+ def generate_room_specs(params):
112
+ specs = []
113
+ num_bed = params["num_bedrooms"]
114
+ num_toi = params["toilets"]
115
+ has_pooja = params.get("has_pooja", False)
116
+ has_study = params.get("has_study", False)
117
+ has_balc = params.get("has_balcony", False)
118
+ parking = params.get("parking", False)
119
+ num_floors = params.get("num_floors", 1)
120
+ has_stilt = params.get("has_stilt", False)
121
+ has_basement = params.get("has_basement", False)
122
+ custom = params.get("custom_room_config", None) or []
123
+
124
+ def add(rtype, rid, name, zone, floor="gf", target_area=None):
125
+ specs.append({"id": rid, "type": rtype, "name": name, "zone": zone, "floor": floor, "target_area": target_area})
126
+
127
+ add("living", "living_1", "Living Room", "front", "gf", 18)
128
+ add("kitchen", "kitchen_1", "Kitchen", "mid", "gf", 9)
129
+ add("dining", "dining_1", "Dining Area", "front", "gf", 12)
130
+ add("toilet", "toilet_common", "Common Toilet", "back", "gf", 3.5)
131
+
132
+ for i in range(num_bed):
133
+ floor = "gf" if i < num_bed - (num_floors-1) else ("ff" if num_floors > 1 else "gf")
134
+ if i == 0:
135
+ add("master_bedroom", f"bedroom_{i+1}", "Master Bedroom", "back", floor, 16)
136
+ else:
137
+ add("bedroom", f"bedroom_{i+1}", f"Bedroom {i+1}", "back", floor, 12)
138
+
139
+ for i in range(num_toi):
140
+ floor = "gf" if i < num_toi - (num_floors-1) else ("ff" if num_floors > 1 else "gf")
141
+ add("toilet", f"toilet_{i+1}", f"Toilet {i+1}", "back", floor, 3.5)
142
+
143
+ if has_pooja: add("pooja", "pooja_1", "Pooja Room", "back", "gf", 2.5)
144
+ if has_study:
145
+ floor = "ff" if num_floors > 1 else "gf"
146
+ add("study", "study_1", "Study Room", "back", floor, 8)
147
+ if has_balc: add("balcony", "balcony_1", "Balcony", "side", "gf", 5)
148
+ add("staircase", "stairs_1", "Staircase", "mid", "gf", 10)
149
+
150
+ if has_stilt or parking:
151
+ add("parking", "parking_1", "Parking", "side", "stilt", 15)
152
+ add("staircase", "stairs_stilt", "Staircase (Stilt)", "mid", "stilt", 10)
153
+
154
+ if has_basement:
155
+ add("store", "store_base", "Storage", "back", "basement", 8)
156
+ add("staircase", "stairs_base", "Staircase (Basement)", "mid", "basement", 10)
157
+
158
+ if num_floors >= 2:
159
+ ff_beds = max(0, num_bed - 1)
160
+ for i in range(ff_beds):
161
+ add("bedroom", f"bedroom_ff_{i+1}", f"Bedroom {num_bed - ff_beds + i + 1}", "back", "ff", 12)
162
+ add("living", "living_ff", "Family Lounge", "front", "ff", 14)
163
+ add("toilet", "toilet_ff", "Common Toilet (FF)", "back", "ff", 3.5)
164
+ if has_study and num_floors >= 2: add("study", "study_ff", "Study Room", "back", "ff", 8)
165
+ if has_balc: add("balcony", "balcony_ff", "Balcony (FF)", "side", "ff", 5)
166
+
167
+ if num_floors >= 3:
168
+ sf_beds = max(0, num_bed - 2)
169
+ for i in range(sf_beds):
170
+ add("bedroom", f"bedroom_sf_{i+1}", f"Bedroom {num_bed - sf_beds - ff_beds + i + 1}", "back", "sf", 12)
171
+ add("living", "living_sf", "Terrace Lounge", "front", "sf", 12)
172
+ add("toilet", "toilet_sf", "Common Toilet (SF)", "back", "sf", 3.5)
173
+ if has_balc: add("balcony", "balcony_sf", "Balcony (SF)", "side", "sf", 5)
174
+
175
+ for i, cr in enumerate(custom):
176
+ rtype = cr.get("type", "room").lower().replace(" ", "_")
177
+ floor_pref = cr.get("floor_preference", "either")
178
+ floor_map = {"basement":"basement","stilt":"stilt","gf":"gf","ff":"ff","sf":"sf","either":"gf"}
179
+ floor = floor_map.get(floor_pref, "gf")
180
+ min_a = cr.get("min_area_sqm", 10)
181
+ add(rtype, f"custom_{i+1}", cr.get("name", f"Custom Room {i+1}"), "mid", floor, min_a)
182
+
183
+ return specs
184
+
185
+ def place_rooms(buildable_poly, rooms_spec, vastu, road_side, north_dir):
186
+ minx, miny, maxx, maxy = polygon_bbox(buildable_poly)
187
+ bw, bd = maxx-minx, maxy-miny
188
+ placed = []
189
+ num_bays = max(2, min(4, int(bw/3.0)))
190
+ bay_widths = distribute_width(bw, num_bays)
191
+
192
+ def place_row(room_list, row_depth, y_start):
193
+ x_cursor = minx
194
+ placed_in_row = []
195
+ for i, room in enumerate(room_list):
196
+ if i >= len(bay_widths): break
197
+ w = bay_widths[i]
198
+ d = row_depth
199
+ target = room.get("target_area", w*d)
200
+ if target > 0 and w > 0:
201
+ adj_d = min(max(target/w, 2.4), row_depth)
202
+ d = round(adj_d, 2)
203
+ poly = rect_polygon(round(x_cursor,2), round(y_start,2), round(w,2), round(d,2))
204
+ area = polygon_area(poly)
205
+ placed_in_row.append({
206
+ "id": room["id"], "type": room["type"], "name": room["name"], "floor": room.get("floor","gf"),
207
+ "polygon": poly, "area_sqm": round(area,2),
208
+ "dimensions": {"width": round(w,2), "depth": round(d,2)},
209
+ "position": {"x": round(x_cursor+w/2,2), "y": round(y_start+d/2,2)},
210
+ })
211
+ x_cursor += w
212
+ return placed_in_row
213
+
214
+ all_rows = []
215
+ front_types = [r for r in rooms_spec if r["type"] in ("living","dining")]
216
+ if front_types: all_rows.append((front_types, bd*0.35))
217
+ mid_types = [r for r in rooms_spec if r["type"] in ("kitchen","utility","staircase","corridor","store")]
218
+ if mid_types: all_rows.append((mid_types, bd*0.3))
219
+ back_types = [r for r in rooms_spec if r["type"] in ("bedroom","master_bedroom","toilet","pooja","study")]
220
+ if back_types: all_rows.append((back_types, bd*0.35))
221
+
222
+ y_cursor = miny
223
+ for room_list, row_depth in all_rows:
224
+ placed.extend(place_row(room_list, row_depth, y_cursor))
225
+ y_cursor += row_depth
226
+
227
+ side_types = [r for r in rooms_spec if r["type"] in ("balcony","parking")]
228
+ for room in side_types:
229
+ if room["type"] == "balcony" and placed:
230
+ ref = placed[0] if road_side in ("N","W") else placed[-1]
231
+ rp = ref["polygon"]
232
+ minx_r, miny_r, maxx_r, maxy_r = polygon_bbox(rp)
233
+ bal_w = min(2.0, bw*0.15)
234
+ if road_side in ("N","S"):
235
+ if road_side == "N": poly = rect_polygon(minx_r, maxy_r, maxx_r-minx_r, bal_w)
236
+ else: poly = rect_polygon(minx_r, miny_r-bal_w, maxx_r-minx_r, bal_w)
237
+ else:
238
+ if road_side == "E": poly = rect_polygon(maxx_r, miny_r, bal_w, maxy_r-miny_r)
239
+ else: poly = rect_polygon(minx_r-bal_w, miny_r, bal_w, maxy_r-miny_r)
240
+ placed.append({
241
+ "id": room["id"], "type": "balcony", "name": room.get("name","Balcony"), "floor": room.get("floor","gf"),
242
+ "polygon": [[round(v,2) for v in p] for p in poly], "area_sqm": round(polygon_area(poly),2),
243
+ "dimensions": {"width": round(bal_w,2), "depth": round(maxx_r-minx_r,2)},
244
+ "position": {"x": round((minx_r+maxx_r)/2,2), "y": round((miny_r+maxy_r)/2,2)},
245
+ })
246
+ elif room["type"] == "parking":
247
+ poly = rect_polygon(minx, miny, min(bw, 3.0*2.5), min(bd*0.25, 6.0))
248
+ placed.append({
249
+ "id": room["id"], "type": "parking", "name": room.get("name","Parking"), "floor": room.get("floor","stilt"),
250
+ "polygon": [[round(v,2) for v in p] for p in poly], "area_sqm": round(polygon_area(poly),2),
251
+ "dimensions": {"width": round(min(bw,7.5),2), "depth": round(min(bd*0.25,6.0),2)},
252
+ "position": {"x": round(minx+min(bw,7.5)/2,2), "y": round(miny+min(bd*0.25,6.0)/2,2)},
253
+ })
254
+ return placed
255
+
256
+ def generate_openings(rooms, road_side):
257
+ doors, windows = [], []
258
+ entrance = [r for r in rooms if r["type"] == "living" and r["floor"] == "gf"]
259
+ if entrance:
260
+ lr = entrance[0]; poly = lr["polygon"]
261
+ mx, my, Mx, My = polygon_bbox(poly)
262
+ if road_side in ("N","S"):
263
+ x = round((mx+Mx)/2 - 0.45, 2)
264
+ y = My if road_side == "N" else my
265
+ doors.append({"id":"door_main","type":"main_entrance","width":0.9,"from":"outside","to":lr["id"],"position":[x,y],"orientation":"horizontal"})
266
+ else:
267
+ x = Mx if road_side == "E" else mx
268
+ y = round((my+My)/2 - 0.45, 2)
269
+ doors.append({"id":"door_main","type":"main_entrance","width":0.9,"from":"outside","to":lr["id"],"position":[x,y],"orientation":"vertical"})
270
+
271
+ for i, r1 in enumerate(rooms):
272
+ for r2 in rooms[i+1:]:
273
+ if r1["floor"] != r2["floor"]: continue
274
+ m1x, m1y, M1x, M1y = polygon_bbox(r1["polygon"])
275
+ m2x, m2y, M2x, M2y = polygon_bbox(r2["polygon"])
276
+ share_x = not (M1x < m2x or M2x < m1x)
277
+ share_y = not (M1y < m2y or M2y < m1y)
278
+ if share_x and abs(M1y-m2y) < 0.3:
279
+ x = round(max(m1x,m2x)+0.3,2); y = round(M1y,2)
280
+ doors.append({"id":f"door_{r1['id']}_{r2['id']}","type":"internal","width":0.75,"from":r1["id"],"to":r2["id"],"position":[x,y],"orientation":"horizontal"})
281
+ elif share_y and abs(M1x-m2x) < 0.3:
282
+ x = round(M1x,2); y = round(max(m1y,m2y)+0.3,2)
283
+ doors.append({"id":f"door_{r1['id']}_{r2['id']}","type":"internal","width":0.75,"from":r1["id"],"to":r2["id"],"position":[x,y],"orientation":"vertical"})
284
+
285
+ for r in rooms:
286
+ if r["type"] in ("living","bedroom","master_bedroom","dining","kitchen","study"):
287
+ poly = r["polygon"]; mx, my, Mx, My = polygon_bbox(poly)
288
+ if (Mx-mx) >= (My-my):
289
+ y = round((my+My)/2,2); cx = (mx+Mx)/2
290
+ x = mx if abs(mx-cx) > abs(Mx-cx) else Mx
291
+ windows.append({"id":f"win_{r['id']}","room":r["id"],"width":1.2,"height":1.5,"position":[round(x,2),y],"orientation":"vertical"})
292
+ else:
293
+ x = round((mx+Mx)/2,2); cy = (my+My)/2
294
+ y = my if abs(my-cy) > abs(My-cy) else My
295
+ windows.append({"id":f"win_{r['id']}","room":r["id"],"width":1.5,"height":1.2,"position":[x,round(y,2)],"orientation":"horizontal"})
296
+ return doors, windows
297
+
298
+ def generate_example(seed=None):
299
+ if seed is not None: random.seed(seed)
300
+ plot_length = round(random.uniform(8.0, 25.0), 1)
301
+ plot_width = round(random.uniform(7.0, 20.0), 1)
302
+ setback_front = round(random.uniform(1.0, 3.0), 1)
303
+ setback_rear = round(random.uniform(0.5, 2.0), 1)
304
+ setback_left = round(random.uniform(0.5, 2.0), 1)
305
+ setback_right = round(random.uniform(0.5, 2.0), 1)
306
+ road_side = random.choice(DIRECTIONS)
307
+ north_direction = random.choice(DIRECTIONS)
308
+ num_bedrooms = random.randint(1, 4)
309
+ toilets = random.randint(1, num_bedrooms+1)
310
+ parking = random.choice([True, False])
311
+ city = random.choice(CITIES)
312
+ vastu_enabled = random.choice([True, False])
313
+ road_width_m = round(random.uniform(6.0, 18.0), 1)
314
+ has_pooja = random.choice([True, False])
315
+ has_study = random.choice([True, False])
316
+ has_balcony = random.choice([True, False])
317
+ plot_shape = random.choice(["rectangular"]*8 + ["l_shaped"]*1 + ["trapezoid"]*1)
318
+ plot_front_width = plot_length if plot_shape != "trapezoid" else round(plot_length*random.uniform(0.8,1.0),1)
319
+ plot_rear_width = plot_length if plot_shape != "trapezoid" else round(plot_length*random.uniform(0.7,1.0),1)
320
+ plot_side_offset = 0.0 if plot_shape != "trapezoid" else round(random.uniform(-1.0,1.0),1)
321
+ cutout_corner = random.choice(["NE","NW","SE","SW"])
322
+ cutout_width = round(plot_length*random.uniform(0.15,0.35),1) if plot_shape=="l_shaped" else 0.0
323
+ cutout_height = round(plot_width*random.uniform(0.15,0.35),1) if plot_shape=="l_shaped" else 0.0
324
+ num_floors = random.choices([1,2,3], weights=[5,3,1])[0]
325
+ has_stilt = random.choice([True, False]) if num_floors > 1 else False
326
+ has_basement = random.choice([True, False])
327
+ municipality = random.choice(MUNICIPALITIES)
328
+
329
+ min_buildable = 5.0
330
+ if plot_length - setback_front - setback_rear < min_buildable:
331
+ setback_front = min(setback_front, (plot_length-min_buildable)/2)
332
+ setback_rear = min(setback_rear, (plot_length-min_buildable)/2)
333
+ if plot_width - setback_left - setback_right < min_buildable:
334
+ setback_left = min(setback_left, (plot_width-min_buildable)/2)
335
+ setback_right = min(setback_right, (plot_width-min_buildable)/2)
336
+
337
+ params = {
338
+ "name": f"Project_{random.randint(1000,9999)}",
339
+ "plot_length": plot_length, "plot_width": plot_width,
340
+ "setback_front": round(setback_front,1), "setback_rear": round(setback_rear,1),
341
+ "setback_left": round(setback_left,1), "setback_right": round(setback_right,1),
342
+ "road_side": road_side, "north_direction": north_direction,
343
+ "num_bedrooms": num_bedrooms, "toilets": toilets, "parking": parking,
344
+ "city": city, "vastu_enabled": vastu_enabled, "road_width_m": road_width_m,
345
+ "has_pooja": has_pooja, "has_study": has_study, "has_balcony": has_balcony,
346
+ "plot_shape": plot_shape,
347
+ "plot_front_width": plot_front_width if plot_shape=="trapezoid" else None,
348
+ "plot_rear_width": plot_rear_width if plot_shape=="trapezoid" else None,
349
+ "plot_side_offset": plot_side_offset if plot_shape=="trapezoid" else None,
350
+ "plot_corners": None,
351
+ "cutout_corner": cutout_corner, "cutout_width": cutout_width, "cutout_height": cutout_height,
352
+ "num_floors": num_floors, "has_stilt": has_stilt, "has_basement": has_basement,
353
+ "municipality": municipality, "custom_room_config": None, "team_id": None,
354
+ }
355
+
356
+ plot_boundary = make_plot_boundary(params)
357
+ buildable_boundary = make_buildable_boundary(plot_boundary, params)
358
+ room_specs = generate_room_specs(params)
359
+ rooms = place_rooms(buildable_boundary, room_specs, vastu_enabled, road_side, north_direction)
360
+ doors, windows = generate_openings(rooms, road_side)
361
+
362
+ total_area = sum(r["area_sqm"] for r in rooms if r["floor"] not in ("stilt","basement"))
363
+ built_up_area = sum(r["area_sqm"] for r in rooms)
364
+
365
+ floorplan = {
366
+ "project_name": params["name"],
367
+ "plot": {
368
+ "shape": plot_shape,
369
+ "outer_boundary": plot_boundary,
370
+ "setbacks": {"front": params["setback_front"], "rear": params["setback_rear"],
371
+ "left": params["setback_left"], "right": params["setback_right"]},
372
+ "buildable_boundary": buildable_boundary,
373
+ "road_side": road_side, "north_direction": north_direction,
374
+ "plot_length": plot_length, "plot_width": plot_width,
375
+ },
376
+ "rooms": rooms,
377
+ "doors": doors,
378
+ "windows": windows,
379
+ "dimensions": {
380
+ "total_built_up_area_sqm": round(built_up_area,2),
381
+ "total_carpet_area_sqm": round(total_area,2),
382
+ "ground_floor_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="gf"),2),
383
+ "first_floor_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="ff"),2),
384
+ "second_floor_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="sf"),2),
385
+ "stilt_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="stilt"),2),
386
+ "basement_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="basement"),2),
387
+ },
388
+ "meta": {"num_floors": num_floors, "has_stilt": has_stilt, "has_basement": has_basement,
389
+ "vastu_enabled": vastu_enabled, "city": city, "municipality": municipality},
390
+ }
391
+
392
+ lines = [
393
+ f"Generate a floor plan for project '{params['name']}'.",
394
+ f"Plot dimensions: {plot_length}m x {plot_width}m, shape: {plot_shape}.",
395
+ f"Setbacks: front={params['setback_front']}m, rear={params['setback_rear']}m, left={params['setback_left']}m, right={params['setback_right']}m.",
396
+ f"Road side: {road_side}, North direction: {north_direction}.",
397
+ f"Requirements: {num_bedrooms} bedrooms, {toilets} toilets.",
398
+ ]
399
+ if parking: lines.append("Parking is required.")
400
+ if has_pooja: lines.append("Include a Pooja room.")
401
+ if has_study: lines.append("Include a Study room.")
402
+ if has_balcony: lines.append("Include a Balcony.")
403
+ if has_stilt: lines.append("Stilt parking required.")
404
+ if has_basement: lines.append("Include a basement.")
405
+ lines.append(f"Number of floors: {num_floors} (1=G, 2=G+1, 3=G+2).")
406
+ if vastu_enabled: lines.append("Vastu compliance is enabled.")
407
+ lines.append(f"City: {city}, Municipality: {municipality or 'N/A'}.")
408
+ prompt = "\n".join(lines)
409
+
410
+ return {"prompt": prompt, "completion": json.dumps(floorplan, indent=2), "params": params}
411
+
412
+ def create_dataset(num_train=5000, num_val=500, num_test=500):
413
+ print(f"Generating {num_train} train, {num_val} val, {num_test} test examples...")
414
+ train = [generate_example(seed=i) for i in range(num_train)]
415
+ val = [generate_example(seed=100000+i) for i in range(num_val)]
416
+ test = [generate_example(seed=200000+i) for i in range(num_test)]
417
+ ds = DatasetDict({
418
+ "train": Dataset.from_list(train),
419
+ "validation": Dataset.from_list(val),
420
+ "test": Dataset.from_list(test),
421
+ })
422
+ return ds
423
+
424
+ if __name__ == "__main__":
425
+ ds = create_dataset(5000, 500, 500)
426
+ ds.save_to_disk("/app/floorplan_synthetic_dataset")
427
+ print("Saved to /app/floorplan_synthetic_dataset")
428
+ print(f"Train: {len(ds['train'])}, Val: {len(ds['validation'])}, Test: {len(ds['test'])}")
429
+ print("\n--- Sample prompt ---")
430
+ print(ds["train"][0]["prompt"][:500])
431
+ print("\n--- Sample completion (truncated) ---")
432
+ print(ds["train"][0]["completion"][:500])