| """ |
| Synthetic floorplan dataset generator for Indian residential construction. |
| Produces (parametric_input, floorplan_output) pairs matching the user's ProjectCreate schema. |
| """ |
| import json, random, math |
| from dataclasses import dataclass |
| from typing import List, Dict, Any, Optional, Tuple |
| from datasets import Dataset, DatasetDict |
|
|
| ROOM_SIZES = { |
| "bedroom": {"min_w": 2.7, "max_w": 4.2, "min_d": 3.0, "max_d": 4.8, "area_range": (10, 20)}, |
| "master_bedroom":{"min_w": 3.3, "max_w": 4.5, "min_d": 3.6, "max_d": 5.4, "area_range": (14, 25)}, |
| "toilet": {"min_w": 1.2, "max_w": 1.8, "min_d": 1.8, "max_d": 2.7, "area_range": (2.5, 5)}, |
| "kitchen": {"min_w": 2.4, "max_w": 3.6, "min_d": 2.7, "max_d": 4.2, "area_range": (7, 15)}, |
| "living": {"min_w": 3.3, "max_w": 5.4, "min_d": 3.6, "max_d": 6.0, "area_range": (14, 30)}, |
| "dining": {"min_w": 2.7, "max_w": 3.6, "min_d": 3.0, "max_d": 4.5, "area_range": (9, 16)}, |
| "pooja": {"min_w": 1.2, "max_w": 1.8, "min_d": 1.5, "max_d": 2.1, "area_range": (1.8, 4)}, |
| "study": {"min_w": 2.1, "max_w": 3.0, "min_d": 2.4, "max_d": 3.6, "area_range": (5, 11)}, |
| "balcony": {"min_w": 1.2, "max_w": 2.4, "min_d": 2.4, "max_d": 4.5, "area_range": (3, 10)}, |
| "parking": {"min_w": 2.5, "max_w": 3.3, "min_d": 5.0, "max_d": 6.0, "area_range": (12, 20)}, |
| "staircase": {"min_w": 2.7, "max_w": 3.3, "min_d": 3.0, "max_d": 4.5, "area_range": (8, 15)}, |
| "corridor": {"min_w": 1.0, "max_w": 1.5, "min_d": 2.4, "max_d": 6.0, "area_range": (2.5, 9)}, |
| "utility": {"min_w": 1.5, "max_w": 2.4, "min_d": 1.8, "max_d": 3.0, "area_range": (3, 7)}, |
| "store": {"min_w": 1.2, "max_w": 2.1, "min_d": 1.5, "max_d": 2.4, "area_range": (2, 5)}, |
| } |
|
|
| PLOT_SHAPES = ["rectangular", "l_shaped", "trapezoid"] |
| DIRECTIONS = ["N", "S", "E", "W"] |
| CITIES = ["Delhi", "Mumbai", "Bangalore", "Chennai", "Hyderabad", "Pune", "Kolkata", "Ahmedabad", "Jaipur", "other"] |
| MUNICIPALITIES = ["MC", "MDA", "PMA", "BDA", "GHMC", "BBMP", "MCD", "KMC", "JDA", None] |
|
|
| def rect_polygon(x, y, w, d): |
| return [[round(v, 2) for v in p] for p in [[x, y], [x+w, y], [x+w, y+d], [x, y+d]]] |
|
|
| def polygon_area(poly): |
| n = len(poly) |
| area = 0.0 |
| for i in range(n): |
| x1, y1 = poly[i] |
| x2, y2 = poly[(i+1)%n] |
| area += x1*y2 - x2*y1 |
| return abs(area)/2.0 |
|
|
| def polygon_bbox(poly): |
| xs = [p[0] for p in poly]; ys = [p[1] for p in poly] |
| return min(xs), min(ys), max(xs), max(ys) |
|
|
| def make_plot_boundary(params): |
| shape = params.get("plot_shape", "rectangular") |
| L, W = params["plot_length"], params["plot_width"] |
| if shape == "rectangular": |
| return rect_polygon(0, 0, L, W) |
| if shape == "l_shaped": |
| cw = params.get("cutout_width", L*0.3) |
| ch = params.get("cutout_height", W*0.3) |
| corner = params.get("cutout_corner", "NE") |
| if corner == "NE": |
| return [[0,0],[L,0],[L,W-ch],[L-cw,W-ch],[L-cw,W],[0,W]] |
| elif corner == "NW": |
| return [[0,0],[L,0],[L,W],[cw,W],[cw,W-ch],[0,W-ch]] |
| elif corner == "SE": |
| return [[0,0],[L-cw,0],[L-cw,ch],[L,ch],[L,W],[0,W]] |
| elif corner == "SW": |
| return [[cw,0],[L,0],[L,W],[0,W],[0,ch],[cw,ch]] |
| return rect_polygon(0,0,L,W) |
| if shape == "trapezoid": |
| fw = params.get("plot_front_width", L) |
| rw = params.get("plot_rear_width", L*0.8) |
| off = params.get("plot_side_offset", 0.0) |
| return [[0,0],[fw,0],[fw-off,W],[-off,W]] |
| return rect_polygon(0,0,L,W) |
|
|
| def make_buildable_boundary(plot_poly, params): |
| sf = params.get("setback_front", 1.5) |
| sr = params.get("setback_rear", 1.0) |
| sl = params.get("setback_left", 1.0) |
| srt = params.get("setback_right", 1.0) |
| minx, miny, maxx, maxy = polygon_bbox(plot_poly) |
| if len(plot_poly) == 4 and abs((maxx-minx)*(maxy-miny) - polygon_area(plot_poly)) < 0.1: |
| return rect_polygon(minx+sl, miny+sf, maxx-minx-sl-srt, maxy-miny-sf-sr) |
| return rect_polygon(minx+sl, miny+sf, maxx-minx-sl-srt, maxy-miny-sf-sr) |
|
|
| def distribute_width(total, n): |
| base = total/n |
| bays = [] |
| remaining = total |
| for i in range(n): |
| if i == n-1: |
| bays.append(round(remaining, 2)) |
| else: |
| bay = max(2.4, round(base, 1)) |
| bay = min(bay, remaining - 2.4*(n-i-1)) |
| bays.append(bay) |
| remaining -= bay |
| return bays |
|
|
| def distribute_depth(total, n): |
| base = total/n |
| rows = [] |
| remaining = total |
| for i in range(n): |
| if i == n-1: |
| rows.append(round(remaining, 2)) |
| else: |
| d = max(2.4, round(base, 1)) |
| d = min(d, remaining - 2.4*(n-i-1)) |
| rows.append(d) |
| remaining -= d |
| return rows |
|
|
| def generate_room_specs(params): |
| specs = [] |
| num_bed = params["num_bedrooms"] |
| num_toi = params["toilets"] |
| has_pooja = params.get("has_pooja", False) |
| has_study = params.get("has_study", False) |
| has_balc = params.get("has_balcony", False) |
| parking = params.get("parking", False) |
| num_floors = params.get("num_floors", 1) |
| has_stilt = params.get("has_stilt", False) |
| has_basement = params.get("has_basement", False) |
| custom = params.get("custom_room_config", None) or [] |
|
|
| def add(rtype, rid, name, zone, floor="gf", target_area=None): |
| specs.append({"id": rid, "type": rtype, "name": name, "zone": zone, "floor": floor, "target_area": target_area}) |
|
|
| add("living", "living_1", "Living Room", "front", "gf", 18) |
| add("kitchen", "kitchen_1", "Kitchen", "mid", "gf", 9) |
| add("dining", "dining_1", "Dining Area", "front", "gf", 12) |
| add("toilet", "toilet_common", "Common Toilet", "back", "gf", 3.5) |
|
|
| for i in range(num_bed): |
| floor = "gf" if i < num_bed - (num_floors-1) else ("ff" if num_floors > 1 else "gf") |
| if i == 0: |
| add("master_bedroom", f"bedroom_{i+1}", "Master Bedroom", "back", floor, 16) |
| else: |
| add("bedroom", f"bedroom_{i+1}", f"Bedroom {i+1}", "back", floor, 12) |
|
|
| for i in range(num_toi): |
| floor = "gf" if i < num_toi - (num_floors-1) else ("ff" if num_floors > 1 else "gf") |
| add("toilet", f"toilet_{i+1}", f"Toilet {i+1}", "back", floor, 3.5) |
|
|
| if has_pooja: add("pooja", "pooja_1", "Pooja Room", "back", "gf", 2.5) |
| if has_study: |
| floor = "ff" if num_floors > 1 else "gf" |
| add("study", "study_1", "Study Room", "back", floor, 8) |
| if has_balc: add("balcony", "balcony_1", "Balcony", "side", "gf", 5) |
| add("staircase", "stairs_1", "Staircase", "mid", "gf", 10) |
|
|
| if has_stilt or parking: |
| add("parking", "parking_1", "Parking", "side", "stilt", 15) |
| add("staircase", "stairs_stilt", "Staircase (Stilt)", "mid", "stilt", 10) |
|
|
| if has_basement: |
| add("store", "store_base", "Storage", "back", "basement", 8) |
| add("staircase", "stairs_base", "Staircase (Basement)", "mid", "basement", 10) |
|
|
| if num_floors >= 2: |
| ff_beds = max(0, num_bed - 1) |
| for i in range(ff_beds): |
| add("bedroom", f"bedroom_ff_{i+1}", f"Bedroom {num_bed - ff_beds + i + 1}", "back", "ff", 12) |
| add("living", "living_ff", "Family Lounge", "front", "ff", 14) |
| add("toilet", "toilet_ff", "Common Toilet (FF)", "back", "ff", 3.5) |
| if has_study and num_floors >= 2: add("study", "study_ff", "Study Room", "back", "ff", 8) |
| if has_balc: add("balcony", "balcony_ff", "Balcony (FF)", "side", "ff", 5) |
|
|
| if num_floors >= 3: |
| sf_beds = max(0, num_bed - 2) |
| for i in range(sf_beds): |
| add("bedroom", f"bedroom_sf_{i+1}", f"Bedroom {num_bed - sf_beds - ff_beds + i + 1}", "back", "sf", 12) |
| add("living", "living_sf", "Terrace Lounge", "front", "sf", 12) |
| add("toilet", "toilet_sf", "Common Toilet (SF)", "back", "sf", 3.5) |
| if has_balc: add("balcony", "balcony_sf", "Balcony (SF)", "side", "sf", 5) |
|
|
| for i, cr in enumerate(custom): |
| rtype = cr.get("type", "room").lower().replace(" ", "_") |
| floor_pref = cr.get("floor_preference", "either") |
| floor_map = {"basement":"basement","stilt":"stilt","gf":"gf","ff":"ff","sf":"sf","either":"gf"} |
| floor = floor_map.get(floor_pref, "gf") |
| min_a = cr.get("min_area_sqm", 10) |
| add(rtype, f"custom_{i+1}", cr.get("name", f"Custom Room {i+1}"), "mid", floor, min_a) |
|
|
| return specs |
|
|
| def place_rooms(buildable_poly, rooms_spec, vastu, road_side, north_dir): |
| minx, miny, maxx, maxy = polygon_bbox(buildable_poly) |
| bw, bd = maxx-minx, maxy-miny |
| placed = [] |
| num_bays = max(2, min(4, int(bw/3.0))) |
| bay_widths = distribute_width(bw, num_bays) |
|
|
| def place_row(room_list, row_depth, y_start): |
| x_cursor = minx |
| placed_in_row = [] |
| for i, room in enumerate(room_list): |
| if i >= len(bay_widths): break |
| w = bay_widths[i] |
| d = row_depth |
| target = room.get("target_area", w*d) |
| if target > 0 and w > 0: |
| adj_d = min(max(target/w, 2.4), row_depth) |
| d = round(adj_d, 2) |
| poly = rect_polygon(round(x_cursor,2), round(y_start,2), round(w,2), round(d,2)) |
| area = polygon_area(poly) |
| placed_in_row.append({ |
| "id": room["id"], "type": room["type"], "name": room["name"], "floor": room.get("floor","gf"), |
| "polygon": poly, "area_sqm": round(area,2), |
| "dimensions": {"width": round(w,2), "depth": round(d,2)}, |
| "position": {"x": round(x_cursor+w/2,2), "y": round(y_start+d/2,2)}, |
| }) |
| x_cursor += w |
| return placed_in_row |
|
|
| all_rows = [] |
| front_types = [r for r in rooms_spec if r["type"] in ("living","dining")] |
| if front_types: all_rows.append((front_types, bd*0.35)) |
| mid_types = [r for r in rooms_spec if r["type"] in ("kitchen","utility","staircase","corridor","store")] |
| if mid_types: all_rows.append((mid_types, bd*0.3)) |
| back_types = [r for r in rooms_spec if r["type"] in ("bedroom","master_bedroom","toilet","pooja","study")] |
| if back_types: all_rows.append((back_types, bd*0.35)) |
|
|
| y_cursor = miny |
| for room_list, row_depth in all_rows: |
| placed.extend(place_row(room_list, row_depth, y_cursor)) |
| y_cursor += row_depth |
|
|
| side_types = [r for r in rooms_spec if r["type"] in ("balcony","parking")] |
| for room in side_types: |
| if room["type"] == "balcony" and placed: |
| ref = placed[0] if road_side in ("N","W") else placed[-1] |
| rp = ref["polygon"] |
| minx_r, miny_r, maxx_r, maxy_r = polygon_bbox(rp) |
| bal_w = min(2.0, bw*0.15) |
| if road_side in ("N","S"): |
| if road_side == "N": poly = rect_polygon(minx_r, maxy_r, maxx_r-minx_r, bal_w) |
| else: poly = rect_polygon(minx_r, miny_r-bal_w, maxx_r-minx_r, bal_w) |
| else: |
| if road_side == "E": poly = rect_polygon(maxx_r, miny_r, bal_w, maxy_r-miny_r) |
| else: poly = rect_polygon(minx_r-bal_w, miny_r, bal_w, maxy_r-miny_r) |
| placed.append({ |
| "id": room["id"], "type": "balcony", "name": room.get("name","Balcony"), "floor": room.get("floor","gf"), |
| "polygon": [[round(v,2) for v in p] for p in poly], "area_sqm": round(polygon_area(poly),2), |
| "dimensions": {"width": round(bal_w,2), "depth": round(maxx_r-minx_r,2)}, |
| "position": {"x": round((minx_r+maxx_r)/2,2), "y": round((miny_r+maxy_r)/2,2)}, |
| }) |
| elif room["type"] == "parking": |
| poly = rect_polygon(minx, miny, min(bw, 3.0*2.5), min(bd*0.25, 6.0)) |
| placed.append({ |
| "id": room["id"], "type": "parking", "name": room.get("name","Parking"), "floor": room.get("floor","stilt"), |
| "polygon": [[round(v,2) for v in p] for p in poly], "area_sqm": round(polygon_area(poly),2), |
| "dimensions": {"width": round(min(bw,7.5),2), "depth": round(min(bd*0.25,6.0),2)}, |
| "position": {"x": round(minx+min(bw,7.5)/2,2), "y": round(miny+min(bd*0.25,6.0)/2,2)}, |
| }) |
| return placed |
|
|
| def generate_openings(rooms, road_side): |
| doors, windows = [], [] |
| entrance = [r for r in rooms if r["type"] == "living" and r["floor"] == "gf"] |
| if entrance: |
| lr = entrance[0]; poly = lr["polygon"] |
| mx, my, Mx, My = polygon_bbox(poly) |
| if road_side in ("N","S"): |
| x = round((mx+Mx)/2 - 0.45, 2) |
| y = My if road_side == "N" else my |
| doors.append({"id":"door_main","type":"main_entrance","width":0.9,"from":"outside","to":lr["id"],"position":[x,y],"orientation":"horizontal"}) |
| else: |
| x = Mx if road_side == "E" else mx |
| y = round((my+My)/2 - 0.45, 2) |
| doors.append({"id":"door_main","type":"main_entrance","width":0.9,"from":"outside","to":lr["id"],"position":[x,y],"orientation":"vertical"}) |
|
|
| for i, r1 in enumerate(rooms): |
| for r2 in rooms[i+1:]: |
| if r1["floor"] != r2["floor"]: continue |
| m1x, m1y, M1x, M1y = polygon_bbox(r1["polygon"]) |
| m2x, m2y, M2x, M2y = polygon_bbox(r2["polygon"]) |
| share_x = not (M1x < m2x or M2x < m1x) |
| share_y = not (M1y < m2y or M2y < m1y) |
| if share_x and abs(M1y-m2y) < 0.3: |
| x = round(max(m1x,m2x)+0.3,2); y = round(M1y,2) |
| doors.append({"id":f"door_{r1['id']}_{r2['id']}","type":"internal","width":0.75,"from":r1["id"],"to":r2["id"],"position":[x,y],"orientation":"horizontal"}) |
| elif share_y and abs(M1x-m2x) < 0.3: |
| x = round(M1x,2); y = round(max(m1y,m2y)+0.3,2) |
| doors.append({"id":f"door_{r1['id']}_{r2['id']}","type":"internal","width":0.75,"from":r1["id"],"to":r2["id"],"position":[x,y],"orientation":"vertical"}) |
|
|
| for r in rooms: |
| if r["type"] in ("living","bedroom","master_bedroom","dining","kitchen","study"): |
| poly = r["polygon"]; mx, my, Mx, My = polygon_bbox(poly) |
| if (Mx-mx) >= (My-my): |
| y = round((my+My)/2,2); cx = (mx+Mx)/2 |
| x = mx if abs(mx-cx) > abs(Mx-cx) else Mx |
| windows.append({"id":f"win_{r['id']}","room":r["id"],"width":1.2,"height":1.5,"position":[round(x,2),y],"orientation":"vertical"}) |
| else: |
| x = round((mx+Mx)/2,2); cy = (my+My)/2 |
| y = my if abs(my-cy) > abs(My-cy) else My |
| windows.append({"id":f"win_{r['id']}","room":r["id"],"width":1.5,"height":1.2,"position":[x,round(y,2)],"orientation":"horizontal"}) |
| return doors, windows |
|
|
| def generate_example(seed=None): |
| if seed is not None: random.seed(seed) |
| plot_length = round(random.uniform(8.0, 25.0), 1) |
| plot_width = round(random.uniform(7.0, 20.0), 1) |
| setback_front = round(random.uniform(1.0, 3.0), 1) |
| setback_rear = round(random.uniform(0.5, 2.0), 1) |
| setback_left = round(random.uniform(0.5, 2.0), 1) |
| setback_right = round(random.uniform(0.5, 2.0), 1) |
| road_side = random.choice(DIRECTIONS) |
| north_direction = random.choice(DIRECTIONS) |
| num_bedrooms = random.randint(1, 4) |
| toilets = random.randint(1, num_bedrooms+1) |
| parking = random.choice([True, False]) |
| city = random.choice(CITIES) |
| vastu_enabled = random.choice([True, False]) |
| road_width_m = round(random.uniform(6.0, 18.0), 1) |
| has_pooja = random.choice([True, False]) |
| has_study = random.choice([True, False]) |
| has_balcony = random.choice([True, False]) |
| plot_shape = random.choice(["rectangular"]*8 + ["l_shaped"]*1 + ["trapezoid"]*1) |
| plot_front_width = plot_length if plot_shape != "trapezoid" else round(plot_length*random.uniform(0.8,1.0),1) |
| plot_rear_width = plot_length if plot_shape != "trapezoid" else round(plot_length*random.uniform(0.7,1.0),1) |
| plot_side_offset = 0.0 if plot_shape != "trapezoid" else round(random.uniform(-1.0,1.0),1) |
| cutout_corner = random.choice(["NE","NW","SE","SW"]) |
| cutout_width = round(plot_length*random.uniform(0.15,0.35),1) if plot_shape=="l_shaped" else 0.0 |
| cutout_height = round(plot_width*random.uniform(0.15,0.35),1) if plot_shape=="l_shaped" else 0.0 |
| num_floors = random.choices([1,2,3], weights=[5,3,1])[0] |
| has_stilt = random.choice([True, False]) if num_floors > 1 else False |
| has_basement = random.choice([True, False]) |
| municipality = random.choice(MUNICIPALITIES) |
|
|
| min_buildable = 5.0 |
| if plot_length - setback_front - setback_rear < min_buildable: |
| setback_front = min(setback_front, (plot_length-min_buildable)/2) |
| setback_rear = min(setback_rear, (plot_length-min_buildable)/2) |
| if plot_width - setback_left - setback_right < min_buildable: |
| setback_left = min(setback_left, (plot_width-min_buildable)/2) |
| setback_right = min(setback_right, (plot_width-min_buildable)/2) |
|
|
| params = { |
| "name": f"Project_{random.randint(1000,9999)}", |
| "plot_length": plot_length, "plot_width": plot_width, |
| "setback_front": round(setback_front,1), "setback_rear": round(setback_rear,1), |
| "setback_left": round(setback_left,1), "setback_right": round(setback_right,1), |
| "road_side": road_side, "north_direction": north_direction, |
| "num_bedrooms": num_bedrooms, "toilets": toilets, "parking": parking, |
| "city": city, "vastu_enabled": vastu_enabled, "road_width_m": road_width_m, |
| "has_pooja": has_pooja, "has_study": has_study, "has_balcony": has_balcony, |
| "plot_shape": plot_shape, |
| "plot_front_width": plot_front_width if plot_shape=="trapezoid" else None, |
| "plot_rear_width": plot_rear_width if plot_shape=="trapezoid" else None, |
| "plot_side_offset": plot_side_offset if plot_shape=="trapezoid" else None, |
| "plot_corners": None, |
| "cutout_corner": cutout_corner, "cutout_width": cutout_width, "cutout_height": cutout_height, |
| "num_floors": num_floors, "has_stilt": has_stilt, "has_basement": has_basement, |
| "municipality": municipality, "custom_room_config": None, "team_id": None, |
| } |
|
|
| plot_boundary = make_plot_boundary(params) |
| buildable_boundary = make_buildable_boundary(plot_boundary, params) |
| room_specs = generate_room_specs(params) |
| rooms = place_rooms(buildable_boundary, room_specs, vastu_enabled, road_side, north_direction) |
| doors, windows = generate_openings(rooms, road_side) |
|
|
| total_area = sum(r["area_sqm"] for r in rooms if r["floor"] not in ("stilt","basement")) |
| built_up_area = sum(r["area_sqm"] for r in rooms) |
|
|
| floorplan = { |
| "project_name": params["name"], |
| "plot": { |
| "shape": plot_shape, |
| "outer_boundary": plot_boundary, |
| "setbacks": {"front": params["setback_front"], "rear": params["setback_rear"], |
| "left": params["setback_left"], "right": params["setback_right"]}, |
| "buildable_boundary": buildable_boundary, |
| "road_side": road_side, "north_direction": north_direction, |
| "plot_length": plot_length, "plot_width": plot_width, |
| }, |
| "rooms": rooms, |
| "doors": doors, |
| "windows": windows, |
| "dimensions": { |
| "total_built_up_area_sqm": round(built_up_area,2), |
| "total_carpet_area_sqm": round(total_area,2), |
| "ground_floor_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="gf"),2), |
| "first_floor_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="ff"),2), |
| "second_floor_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="sf"),2), |
| "stilt_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="stilt"),2), |
| "basement_area_sqm": round(sum(r["area_sqm"] for r in rooms if r["floor"]=="basement"),2), |
| }, |
| "meta": {"num_floors": num_floors, "has_stilt": has_stilt, "has_basement": has_basement, |
| "vastu_enabled": vastu_enabled, "city": city, "municipality": municipality}, |
| } |
|
|
| lines = [ |
| f"Generate a floor plan for project '{params['name']}'.", |
| f"Plot dimensions: {plot_length}m x {plot_width}m, shape: {plot_shape}.", |
| f"Setbacks: front={params['setback_front']}m, rear={params['setback_rear']}m, left={params['setback_left']}m, right={params['setback_right']}m.", |
| f"Road side: {road_side}, North direction: {north_direction}.", |
| f"Requirements: {num_bedrooms} bedrooms, {toilets} toilets.", |
| ] |
| if parking: lines.append("Parking is required.") |
| if has_pooja: lines.append("Include a Pooja room.") |
| if has_study: lines.append("Include a Study room.") |
| if has_balcony: lines.append("Include a Balcony.") |
| if has_stilt: lines.append("Stilt parking required.") |
| if has_basement: lines.append("Include a basement.") |
| lines.append(f"Number of floors: {num_floors} (1=G, 2=G+1, 3=G+2).") |
| if vastu_enabled: lines.append("Vastu compliance is enabled.") |
| lines.append(f"City: {city}, Municipality: {municipality or 'N/A'}.") |
| prompt = "\n".join(lines) |
|
|
| return {"prompt": prompt, "completion": json.dumps(floorplan, indent=2), "params": params} |
|
|
| def create_dataset(num_train=5000, num_val=500, num_test=500): |
| print(f"Generating {num_train} train, {num_val} val, {num_test} test examples...") |
| train = [generate_example(seed=i) for i in range(num_train)] |
| val = [generate_example(seed=100000+i) for i in range(num_val)] |
| test = [generate_example(seed=200000+i) for i in range(num_test)] |
| ds = DatasetDict({ |
| "train": Dataset.from_list(train), |
| "validation": Dataset.from_list(val), |
| "test": Dataset.from_list(test), |
| }) |
| return ds |
|
|
| if __name__ == "__main__": |
| ds = create_dataset(5000, 500, 500) |
| ds.save_to_disk("/app/floorplan_synthetic_dataset") |
| print("Saved to /app/floorplan_synthetic_dataset") |
| print(f"Train: {len(ds['train'])}, Val: {len(ds['validation'])}, Test: {len(ds['test'])}") |
| print("\n--- Sample prompt ---") |
| print(ds["train"][0]["prompt"][:500]) |
| print("\n--- Sample completion (truncated) ---") |
| print(ds["train"][0]["completion"][:500]) |
|
|