Jibrann commited on
Commit
c17891d
·
verified ·
1 Parent(s): 2d55563

Upload folder using huggingface_hub

Browse files
Files changed (8) hide show
  1. README.md +48 -2
  2. client.py +5 -0
  3. grader.py +124 -0
  4. inference.py +40 -17
  5. models.py +95 -0
  6. server/app.py +13 -13
  7. server/app_environment.py +59 -6
  8. utils.py +173 -11
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Object Placer
3
  emoji: 🔊
4
  colorFrom: purple
5
  colorTo: yellow
@@ -10,4 +10,50 @@ app_file: server/app.py
10
  pinned: false
11
  app_port: 8000
12
  base_path: /web
13
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: The Sorter Project
3
  emoji: 🔊
4
  colorFrom: purple
5
  colorTo: yellow
 
10
  pinned: false
11
  app_port: 8000
12
  base_path: /web
13
+ ---
14
+
15
+ # The Sorter Project
16
+
17
+ ## The Purpose
18
+ Building an environment to make AI models learn on how to **identify**, **place** and **adjust** the position of things in the environment which are scattered in a *random* fashion.
19
+
20
+ ## Real Life Application
21
+ We came up with this idea, keeping in mind its application in factories, warehouses and storage facilities. _(and even your coffee table!)_
22
+
23
+ ## The Problem
24
+ ### **The Industrial Perspective / Micro Perspective**
25
+
26
+ Companies spend milllions if not billions on establishing, maintaining and organisising warehouses and storage facilities, and in a densely populated country like India with increasing demand for land and with the surging property prices efficient storage and orgsation becomes the ***need of the hour***, leading to the demand for an environment or an agent that can help companies and organisations and provide them with ways for the maximum efficient and logical storage of their "objects".
27
+ The environments and agents that specialise in full fledged identifying, sorting, stacking and organising of objects or warehouse material are few in number, and ***we are here to fill that gap***.
28
+
29
+ ### **The Populational Perspective / Macro Persepective**
30
+
31
+ With increase in population causing decrease of 'Open Spaces' it becomes extremely important to **build societies and localities that can cater to a huge chunk of population** and in such a case, The Sorter Project though being mainly built for industrial application, becomes an extremely useful tool that allows proper space utilisation to accomatodate more people whilst taking minimum space. _(so in the near future we might not have to shift to mars)_
32
+
33
+ ## Our Solution
34
+ _We have developed this environment with 'ease' thanks to OpenEnv!_ <br>
35
+ Our Sorter Project consists of ***_3_ different parts*** and ***_4_ different processes/tasks***:
36
+
37
+ ### **Part 1**: The Segmentation Problem<br>
38
+ **Task 1:** Our Project has an ****Segmentation Action**** that makes agents identify objects which is rare to find in multiple similar environments.
39
+ ### **Part 2**: The Identification Problem<br>
40
+ **Task 2:** Our Project has a ****Identification Action**** which is though a part of the Segmentation task, is slightly different, it allows agents to segregate objects into **stackable** and **not stackable** which will be of high importance while addressing the next problem.
41
+
42
+ ### **Part 3**: The Placement Problem<br>
43
+ **Task 3:** Our Project has a ****Placement Action**** that allows agents to place things it has found.<br>
44
+ **Task 4:** It also provides an ****Adjust Action**** for the agent to adjust things _(because no one's good in their first try ! )_
45
+
46
+ ## Technical Details
47
+ ### Reward Logic
48
+
49
+
50
+ ### Demonstration
51
+
52
+ ## Links
53
+ ****Huggingface Link**** (to run `inference.py`): https://huggingface.co/spaces/Jibrann/app <br>
54
+ ****Github Link**** (this page): https://github.com/jibcamun/Reinforcement-Learning-Object-Placement
55
+
56
+ ## Related Works
57
+ [Jumanji](https://github.com/instadeepai/jumanji)<br>
58
+ [miniRL](https://proxyapps.exascaleproject.org/app/minirl/)<br>
59
+ [BabyAI](https://arxiv.org/abs/1810.08272)<br>
client.py CHANGED
@@ -16,6 +16,7 @@ class AppEnv(EnvClient[AppAction, AppObservation, AppState]):
16
  "placement": action.placement,
17
  "isSegmentation": action.isSegmentation,
18
  "findObjects": action.findObjects,
 
19
  }
20
 
21
  def _parse_result(self, payload: Dict) -> StepResult[AppObservation]:
@@ -30,6 +31,8 @@ class AppEnv(EnvClient[AppAction, AppObservation, AppState]):
30
  isDone=obs_data.get("isDone", False),
31
  rewardFeedback=obs_data.get("rewardFeedback", []),
32
  rewardList=obs_data.get("rewardList", []),
 
 
33
  )
34
 
35
  return StepResult(
@@ -52,4 +55,6 @@ class AppEnv(EnvClient[AppAction, AppObservation, AppState]):
52
  ObjectsPresent=payload.get("ObjectsPresent", {}),
53
  rewardFeedback=payload.get("rewardFeedback", []),
54
  rewardList=payload.get("rewardList", []),
 
 
55
  )
 
16
  "placement": action.placement,
17
  "isSegmentation": action.isSegmentation,
18
  "findObjects": action.findObjects,
19
+ "adjust": action.adjust,
20
  }
21
 
22
  def _parse_result(self, payload: Dict) -> StepResult[AppObservation]:
 
31
  isDone=obs_data.get("isDone", False),
32
  rewardFeedback=obs_data.get("rewardFeedback", []),
33
  rewardList=obs_data.get("rewardList", []),
34
+ numberPlaced=obs_data.get("numberPlaced", 0),
35
+ ObjectsPlaced=obs_data.get("ObjectsPlaced", {}),
36
  )
37
 
38
  return StepResult(
 
55
  ObjectsPresent=payload.get("ObjectsPresent", {}),
56
  rewardFeedback=payload.get("rewardFeedback", []),
57
  rewardList=payload.get("rewardList", []),
58
+ numberPlaced=payload.get("numberPlaced", 0),
59
+ ObjectsPlaced=payload.get("ObjectsPlaced", {}),
60
  )
grader.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.preprocessing import MinMaxScaler
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from openai import OpenAI
5
+ import json
6
+ from json import JSONDecodeError
7
+ from numpy import average
8
+
9
+
10
+ load_dotenv()
11
+
12
+ API_URL = os.getenv("API_BASE_URL")
13
+ MODEL = os.getenv("MODEL_NAME")
14
+ API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
15
+
16
+ SYSTEM_PROMPT_GRADING = """
17
+
18
+ You are a professional object sorter who works at the industry level and
19
+ has a good knowledge about how and where things are to be places, you shall receieve the
20
+ list of feedbacks from an accomplice hired, you shall rate the feedback on the scale of 0.0 to 1.0 ONLY.
21
+
22
+ Rules:
23
+ - You shall rate the feedback on the scale of 0.0 to 1.0 ONLY AND also provide a one-line feedback
24
+ - You WILL STRICTLY ABIDE BY THIS JSON FORMAT:
25
+ {
26
+ "grade": float,
27
+ "feedback": str,
28
+ }
29
+ """.strip()
30
+
31
+ TEMPERATURE = 0.2
32
+
33
+
34
+ def _feed_llm(input):
35
+ if not API_URL or not MODEL or not API_KEY:
36
+ missing = [
37
+ name
38
+ for name, value in (
39
+ ("API_BASE_URL", API_URL),
40
+ ("MODEL_NAME", MODEL),
41
+ ("API_KEY/HF_TOKEN", API_KEY),
42
+ )
43
+ if not value
44
+ ]
45
+ raise RuntimeError(
46
+ f"Missing required environment variables: {', '.join(missing)}"
47
+ )
48
+
49
+ client = OpenAI(
50
+ base_url=API_URL,
51
+ api_key=API_KEY,
52
+ )
53
+
54
+ llm_output = client.chat.completions.create(
55
+ model=MODEL,
56
+ messages=[
57
+ {"role": "system", "content": SYSTEM_PROMPT_GRADING},
58
+ {"role": "user", "content": f"{input}"},
59
+ ],
60
+ temperature=TEMPERATURE,
61
+ )
62
+
63
+ return llm_output.choices[0].message.content or ""
64
+
65
+
66
+ def _extract_json_payload(output_str: str):
67
+ output_str = output_str.strip()
68
+
69
+ if output_str.startswith("```"):
70
+ lines = output_str.splitlines()
71
+ if len(lines) >= 3:
72
+ output_str = "\n".join(lines[1:-1]).strip()
73
+
74
+ start = output_str.find("{")
75
+ end = output_str.rfind("}")
76
+
77
+ if start == -1 or end == -1 or end < start:
78
+ raise JSONDecodeError("No JSON object found in model output", output_str, 0)
79
+
80
+ return output_str[start : end + 1]
81
+
82
+
83
+ def parse_output(output_str):
84
+ data = json.loads(_extract_json_payload(output_str))
85
+ return data
86
+
87
+
88
+ def grade_segmentation(appObs):
89
+ scaler = MinMaxScaler()
90
+ reward = appObs.rewardListSegment
91
+ feedback = appObs.rewardFeedbackSegment
92
+ scaler.fit(reward)
93
+ grade = average(scaler.transform(reward))
94
+ llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
95
+ outputFeedback = llmOutput.get("feedback", "")
96
+ outputGrade = llmOutput.get("grade", 0.0)
97
+ cumulativeGrade = (grade + outputGrade) / 2.0
98
+ return (grade, outputGrade, cumulativeGrade, outputFeedback)
99
+
100
+
101
+ def grade_placement(appObs):
102
+ scaler = MinMaxScaler()
103
+ reward = appObs.rewardListPlace
104
+ feedback = appObs.rewardFeedbackPlace
105
+ scaler.fit(reward)
106
+ grade = average(scaler.transform(reward))
107
+ llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
108
+ outputFeedback = llmOutput.get("feedback", "")
109
+ outputGrade = llmOutput.get("grade", 0.0)
110
+ cumulativeGrade = (grade + outputGrade) / 2.0
111
+ return (grade, outputGrade, cumulativeGrade, outputFeedback)
112
+
113
+
114
+ def grade_segmentation(appObs):
115
+ scaler = MinMaxScaler()
116
+ reward = appObs.rewardListAdjust
117
+ feedback = appObs.rewardFeedbackAdjust
118
+ scaler.fit(reward)
119
+ grade = average(scaler.transform(reward))
120
+ llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
121
+ outputFeedback = llmOutput.get("feedback", "")
122
+ outputGrade = llmOutput.get("grade", 0.0)
123
+ cumulativeGrade = (grade + outputGrade) / 2.0
124
+ return (grade, outputGrade, cumulativeGrade, outputFeedback)
inference.py CHANGED
@@ -3,7 +3,7 @@ from dotenv import load_dotenv
3
  from openai import OpenAI
4
  import json
5
  from json import JSONDecodeError
6
- import time
7
 
8
  try:
9
  from models import AppAction, AppObservation
@@ -15,8 +15,14 @@ try:
15
  except ImportError:
16
  from app.server.app_environment import AppEnvironment
17
 
 
 
 
 
 
18
 
19
  load_dotenv()
 
20
 
21
  API_URL = os.getenv("API_BASE_URL")
22
  MODEL = os.getenv("MODEL_NAME")
@@ -28,6 +34,7 @@ FALLBACK_ACTION = {
28
  "isSegmentation": False,
29
  "placement": {},
30
  "findObjects": {},
 
31
  }
32
 
33
  DEBUG = True
@@ -38,7 +45,8 @@ SYSTEM_PROMPT = """
38
  1. **Segment objects** in the environment if `isSegmentation=True`.
39
  2. **Identify objects** and their properties (name, stackable) accurately.
40
  3. **Place objects** in the 3D grid respecting stacking rules and dimensions.
41
- 4. **Use rewards and feedback** from previous steps to improve future actions.
 
42
 
43
  You must strictly return actions that conform to this Pydantic schema:
44
 
@@ -47,37 +55,39 @@ SYSTEM_PROMPT = """
47
  placement: Dict[str, Tuple[int, int, int, bool]]
48
  isSegmentation: bool
49
  findObjects: Dict[str, Tuple[int, int, int, bool]]
 
50
  }
51
 
52
  Rules:
53
  - Only report objects that are found or placed; empty dicts are valid if none.
54
- - Do not modify objects that are already placed unless instructed.
55
  - Coordinates must be within the grid bounds.
56
  - Respect stackable property: non-stackable objects cannot be placed on top of another object.
57
  - Use previous step’s reward and rewardFeedback to adjust your strategy.
 
58
 
59
  Output:
60
  - Always return a valid JSON object conforming to the schema.
61
  - Do not include any extra text, explanations, or commentary.
62
- - If no action is possible, return empty dicts for `placement` and `findObjects`.
63
 
64
  Your goal:
65
  - Maximize cumulative reward.
66
  - Identify all objects correctly.
67
- - Place objects efficiently while respecting stacking rules.
68
  - Learn from reward feedback to improve placement in future steps.
69
 
70
  Always return a valid JSON that conforms exactly to the AppAction Pydantic model:
71
- {"placement": Dict[str, Tuple[int,int,int,bool]] or {}, "isSegmentation": bool, "findObjects": Dict[str, Tuple[int,int,int,bool]] or {}}
72
 
73
  Actions:
74
- - To place an object: {"isSegmentation": false, "placement": {"object_name": [x, y, z, stackable]}, "findObjects": {}}
75
- - To segment objects: {"isSegmentation": true, "placement": {}, "findObjects": {"object_name": [x, y, z, stackable]}}
 
 
76
 
77
  Do not include explanations, text, or extra fields.
78
- If no objects are found or placed, return empty dicts for placement and findObjects.
79
- The output must be parseable and valid for AppAction(**json_output).
80
- """.strip()
81
 
82
  MESSAGES = [{"role": "system", "content": SYSTEM_PROMPT}]
83
  HISTORY = []
@@ -125,7 +135,9 @@ def main() -> None:
125
  )
126
  if not value
127
  ]
128
- raise RuntimeError(f"Missing required environment variables: {', '.join(missing)}")
 
 
129
 
130
  env = AppEnvironment()
131
  observation: AppObservation = env.reset()
@@ -148,22 +160,33 @@ def main() -> None:
148
 
149
  llm_output = client.chat.completions.create(
150
  model=MODEL,
151
- messages=MESSAGES,
 
 
 
 
 
 
 
 
 
 
152
  temperature=TEMPERATURE,
153
  )
154
 
155
  message_content = llm_output.choices[0].message.content or ""
 
156
  action: AppAction = parse_output(message_content)
157
- MESSAGES.append({"role": "assistant", "content": message_content})
158
  observation: AppObservation = env.step(action)
159
 
 
 
160
  HISTORY.append(observation)
 
161
 
162
  if observation.isDone:
163
  break
164
-
165
- time.sleep(100)
166
-
167
  print(HISTORY)
168
 
169
 
 
3
  from openai import OpenAI
4
  import json
5
  from json import JSONDecodeError
6
+ from numpy import set_printoptions
7
 
8
  try:
9
  from models import AppAction, AppObservation
 
15
  except ImportError:
16
  from app.server.app_environment import AppEnvironment
17
 
18
+ try:
19
+ from grader import *
20
+ except ImportError:
21
+ from app.grader import *
22
+
23
 
24
  load_dotenv()
25
+ set_printoptions(precision=2, suppress=True)
26
 
27
  API_URL = os.getenv("API_BASE_URL")
28
  MODEL = os.getenv("MODEL_NAME")
 
34
  "isSegmentation": False,
35
  "placement": {},
36
  "findObjects": {},
37
+ "adjust": ("", "", 0),
38
  }
39
 
40
  DEBUG = True
 
45
  1. **Segment objects** in the environment if `isSegmentation=True`.
46
  2. **Identify objects** and their properties (name, stackable) accurately.
47
  3. **Place objects** in the 3D grid respecting stacking rules and dimensions.
48
+ 4. **Adjust object positions** if necessary to optimize placement and maximize rewards.
49
+ 5. **Use rewards and feedback** from previous steps to improve future actions.
50
 
51
  You must strictly return actions that conform to this Pydantic schema:
52
 
 
55
  placement: Dict[str, Tuple[int, int, int, bool]]
56
  isSegmentation: bool
57
  findObjects: Dict[str, Tuple[int, int, int, bool]]
58
+ adjust : Tuple[str, str, int]
59
  }
60
 
61
  Rules:
62
  - Only report objects that are found or placed; empty dicts are valid if none.
 
63
  - Coordinates must be within the grid bounds.
64
  - Respect stackable property: non-stackable objects cannot be placed on top of another object.
65
  - Use previous step’s reward and rewardFeedback to adjust your strategy.
66
+ - Directions for adjustments for an object can be "UP", "DOWN", "LEFT", "RIGHT", "FORWARD", "BACKWARD", "ROTATE" with a positive integer amount.
67
 
68
  Output:
69
  - Always return a valid JSON object conforming to the schema.
70
  - Do not include any extra text, explanations, or commentary.
71
+ - If no action is possible, return empty dicts for `placement` and `findObjects` and an empty tuple for `adjust`.
72
 
73
  Your goal:
74
  - Maximize cumulative reward.
75
  - Identify all objects correctly.
76
+ - Place objects efficiently while respecting stacking rules (PS: Do not place the objects in the same location as where it is originally found and use adjust function wherever required.)
77
  - Learn from reward feedback to improve placement in future steps.
78
 
79
  Always return a valid JSON that conforms exactly to the AppAction Pydantic model:
80
+ {"placement": Dict[str, Tuple[int,int,int,bool]] or {}, "isSegmentation": bool, "findObjects": Dict[str, Tuple[int,int,int,bool]] or {},"adjust": Tuple[str,str,int] or ("", "", 0)}
81
 
82
  Actions:
83
+ - To place an object: {"isSegmentation": false, "placement": {"object_name": [x, y, z, stackable]}, "findObjects": {}, "adjust":("", "", 0)}
84
+ - To segment objects: {"isSegmentation": true, "placement": {}, "findObjects": {"object_name": [x, y, z, stackable]}, "adjust":("", "", 0)}
85
+ - To adjust objects: {"isSegmentation": false, "placement": {}, "findObjects": {}, "adjust":("object_name", "direction", amount)}
86
+ - To adjust and place objects: {"isSegmentation": false, "placement": {"object_name": [x, y, z, stackable]}, "findObjects": {}, "adjust":("object_name", "direction", amount)}
87
 
88
  Do not include explanations, text, or extra fields.
89
+ If no objects are found, placed or adjusted, return empty dicts for placement and findObjects and empty tuple for adjust.
90
+ The output must be parseable and valid for AppAction(**json_output).""".strip()
 
91
 
92
  MESSAGES = [{"role": "system", "content": SYSTEM_PROMPT}]
93
  HISTORY = []
 
135
  )
136
  if not value
137
  ]
138
+ raise RuntimeError(
139
+ f"Missing required environment variables: {', '.join(missing)}"
140
+ )
141
 
142
  env = AppEnvironment()
143
  observation: AppObservation = env.reset()
 
160
 
161
  llm_output = client.chat.completions.create(
162
  model=MODEL,
163
+ messages=[
164
+ MESSAGES[0],
165
+ {
166
+ "role": "user",
167
+ "content": f"""Observation: {observation.model_dump_json()},
168
+ Previous reward: {observation.reward},
169
+ Previous reward list: {observation.rewardList},
170
+ Previous reward feedback: {observation.rewardFeedback},
171
+ Step: {i}""".strip(),
172
+ },
173
+ ],
174
  temperature=TEMPERATURE,
175
  )
176
 
177
  message_content = llm_output.choices[0].message.content or ""
178
+
179
  action: AppAction = parse_output(message_content)
 
180
  observation: AppObservation = env.step(action)
181
 
182
+ MESSAGES.append({"role": "assistant", "content": message_content})
183
+ print(message_content)
184
  HISTORY.append(observation)
185
+ print(observation)
186
 
187
  if observation.isDone:
188
  break
189
+
 
 
190
  print(HISTORY)
191
 
192
 
models.py CHANGED
@@ -9,13 +9,20 @@ class AppAction(Action):
9
  placement: Dict[str, Tuple[int, int, int, bool]] = Field(
10
  default_factory=dict, description="Placement of the object in a 3D grid"
11
  )
 
12
  isSegmentation: bool = Field(
13
  default=True, description="Whether the model is segmenting the objects"
14
  )
 
15
  findObjects: Dict[str, Tuple[int, int, int, bool]] = Field(
16
  default_factory=dict, description="Dictionary of objects"
17
  )
18
 
 
 
 
 
 
19
 
20
  class AppObservation(Observation):
21
  """Observation from the App environment"""
@@ -24,21 +31,26 @@ class AppObservation(Observation):
24
  default_factory=list,
25
  description="Current placement of the objects in a 3D grid",
26
  )
 
27
  positions: Dict[str, Tuple[int, int, int, bool]] = Field(
28
  default_factory=dict,
29
  description="Dictionary of objects with their positions in the environment",
30
  )
 
31
  objectsLeft: List[str] = Field(
32
  default_factory=list,
33
  description="List of unorganised objects left in the environment",
34
  )
 
35
  objectsFound: List[str] = Field(
36
  default_factory=list,
37
  description="List of objects found in the environment",
38
  )
 
39
  reward: float = Field(
40
  default=0.0, description="Reward received after taking the action"
41
  )
 
42
  isDone: bool = Field(default=False, description="Whether the episode has ended")
43
 
44
  rewardFeedback: list[str] = Field(
@@ -51,6 +63,46 @@ class AppObservation(Observation):
51
  description="List of reward values received after taking the action",
52
  )
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  class AppState(State):
56
  """State for the App environment"""
@@ -69,13 +121,16 @@ class AppState(State):
69
  default_factory=list,
70
  description="List of unorganised objects left in the environment",
71
  )
 
72
  objectsFound: List[str] = Field(
73
  default_factory=list,
74
  description="List of objects found in the environment",
75
  )
 
76
  reward: float = Field(
77
  default=0.0, description="Reward received after taking the action"
78
  )
 
79
  isDone: bool = Field(default=False, description="Whether the episode has ended")
80
 
81
  ObjectsPresent: Dict[str, Tuple[int, int, int, bool]] = Field(
@@ -83,6 +138,11 @@ class AppState(State):
83
  description="Placed objects and their current positions in the environment",
84
  )
85
 
 
 
 
 
 
86
  rewardFeedback: list[str] = Field(
87
  default_factory=list,
88
  description="List of feedback strings describing the reward received after taking the action",
@@ -92,3 +152,38 @@ class AppState(State):
92
  default_factory=list,
93
  description="List of reward values received after taking the action",
94
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  placement: Dict[str, Tuple[int, int, int, bool]] = Field(
10
  default_factory=dict, description="Placement of the object in a 3D grid"
11
  )
12
+
13
  isSegmentation: bool = Field(
14
  default=True, description="Whether the model is segmenting the objects"
15
  )
16
+
17
  findObjects: Dict[str, Tuple[int, int, int, bool]] = Field(
18
  default_factory=dict, description="Dictionary of objects"
19
  )
20
 
21
+ adjust: Tuple[str, str, int] = Field(
22
+ default=("", "", 0),
23
+ description="Adjustment action for moving or rotating objects. Format: (object_name, direction, amount)",
24
+ )
25
+
26
 
27
  class AppObservation(Observation):
28
  """Observation from the App environment"""
 
31
  default_factory=list,
32
  description="Current placement of the objects in a 3D grid",
33
  )
34
+
35
  positions: Dict[str, Tuple[int, int, int, bool]] = Field(
36
  default_factory=dict,
37
  description="Dictionary of objects with their positions in the environment",
38
  )
39
+
40
  objectsLeft: List[str] = Field(
41
  default_factory=list,
42
  description="List of unorganised objects left in the environment",
43
  )
44
+
45
  objectsFound: List[str] = Field(
46
  default_factory=list,
47
  description="List of objects found in the environment",
48
  )
49
+
50
  reward: float = Field(
51
  default=0.0, description="Reward received after taking the action"
52
  )
53
+
54
  isDone: bool = Field(default=False, description="Whether the episode has ended")
55
 
56
  rewardFeedback: list[str] = Field(
 
63
  description="List of reward values received after taking the action",
64
  )
65
 
66
+ numberPlaced: int = Field(
67
+ default=0,
68
+ description="Number of objects successfully placed in the environment",
69
+ )
70
+
71
+ ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field(
72
+ default_factory=dict,
73
+ description="Objects that have been successfully placed in the environment",
74
+ )
75
+
76
+ rewardListSegment: list[float] = Field(
77
+ default_factory=list,
78
+ description="List of reward values received after taking the action",
79
+ )
80
+
81
+ rewardFeedbackSegment: list[str] = Field(
82
+ default_factory=list,
83
+ description="List of feedback strings describing the reward received after taking the action",
84
+ )
85
+
86
+ rewardListPlace: list[float] = Field(
87
+ default_factory=list,
88
+ description="List of feedback strings describing the reward received after taking the action",
89
+ )
90
+
91
+ rewardFeedbackPlace: list[str] = Field(
92
+ default_factory=list,
93
+ description="List of feedback strings describing the reward received after taking the action",
94
+ )
95
+
96
+ rewardListAdjust: list[float] = Field(
97
+ default_factory=list,
98
+ description="List of feedback strings describing the reward received after taking the action",
99
+ )
100
+
101
+ rewardFeedbackAdjust: list[str] = Field(
102
+ default_factory=list,
103
+ description="List of feedback strings describing the reward received after taking the action",
104
+ )
105
+
106
 
107
  class AppState(State):
108
  """State for the App environment"""
 
121
  default_factory=list,
122
  description="List of unorganised objects left in the environment",
123
  )
124
+
125
  objectsFound: List[str] = Field(
126
  default_factory=list,
127
  description="List of objects found in the environment",
128
  )
129
+
130
  reward: float = Field(
131
  default=0.0, description="Reward received after taking the action"
132
  )
133
+
134
  isDone: bool = Field(default=False, description="Whether the episode has ended")
135
 
136
  ObjectsPresent: Dict[str, Tuple[int, int, int, bool]] = Field(
 
138
  description="Placed objects and their current positions in the environment",
139
  )
140
 
141
+ ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field(
142
+ default_factory=dict,
143
+ description="Objects that have been successfully placed in the environment",
144
+ )
145
+
146
  rewardFeedback: list[str] = Field(
147
  default_factory=list,
148
  description="List of feedback strings describing the reward received after taking the action",
 
152
  default_factory=list,
153
  description="List of reward values received after taking the action",
154
  )
155
+
156
+ numberPlaced: int = Field(
157
+ default=0,
158
+ description="Number of objects successfully placed in the environment",
159
+ )
160
+
161
+ rewardListSegment: list[float] = Field(
162
+ default_factory=list,
163
+ description="List of reward values received after taking the action",
164
+ )
165
+
166
+ rewardFeedbackSegment: list[str] = Field(
167
+ default_factory=list,
168
+ description="List of feedback strings describing the reward received after taking the action",
169
+ )
170
+
171
+ rewardListPlace: list[float] = Field(
172
+ default_factory=list,
173
+ description="List of feedback strings describing the reward received after taking the action",
174
+ )
175
+
176
+ rewardFeedbackPlace: list[str] = Field(
177
+ default_factory=list,
178
+ description="List of feedback strings describing the reward received after taking the action",
179
+ )
180
+
181
+ rewardListAdjust: list[float] = Field(
182
+ default_factory=list,
183
+ description="List of feedback strings describing the reward received after taking the action",
184
+ )
185
+
186
+ rewardFeedbackAdjust: list[str] = Field(
187
+ default_factory=list,
188
+ description="List of feedback strings describing the reward received after taking the action",
189
+ )
server/app.py CHANGED
@@ -1,6 +1,6 @@
1
  try:
2
  from openenv.core.env_server.http_server import create_app
3
- except Exception as e: # pragma: no cover
4
  raise ImportError(
5
  "openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
6
  ) from e
@@ -18,21 +18,21 @@ app = create_app(
18
  AppAction,
19
  AppObservation,
20
  env_name="app",
21
- max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
22
  )
23
 
24
 
25
- @app.get("/health")
26
- def health() -> dict[str, str]:
27
- return {"status": "ok"}
28
-
29
-
30
- @app.get("/")
31
- def root() -> dict[str, str]:
32
- return {
33
- "message": "Object Placer API is running",
34
- "health": "/health",
35
- }
36
 
37
 
38
  def main(host: str = "0.0.0.0", port: int = 8000):
 
1
  try:
2
  from openenv.core.env_server.http_server import create_app
3
+ except Exception as e:
4
  raise ImportError(
5
  "openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
6
  ) from e
 
18
  AppAction,
19
  AppObservation,
20
  env_name="app",
21
+ max_concurrent_envs=1,
22
  )
23
 
24
 
25
+ # @app.get("/health")
26
+ # def health() -> dict[str, str]:
27
+ # return {"status": "ok"}
28
+ #
29
+ #
30
+ # @app.get("/")
31
+ # def root() -> dict[str, str]:
32
+ # return {
33
+ # "message": "Object Placer API is running",
34
+ # "health": "/health",
35
+ # }
36
 
37
 
38
  def main(host: str = "0.0.0.0", port: int = 8000):
server/app_environment.py CHANGED
@@ -22,6 +22,7 @@ class AppEnvironment(Environment):
22
  self._reset_count = 0
23
 
24
  def _coerce_state(self) -> AppState:
 
25
  if isinstance(self._state, AppState):
26
  return self._state
27
 
@@ -46,8 +47,16 @@ class AppEnvironment(Environment):
46
  reward=0.0,
47
  isDone=False,
48
  ObjectsPresent=placed,
 
49
  rewardFeedback=[],
50
  rewardList=[],
 
 
 
 
 
 
 
51
  )
52
 
53
  def reset(self) -> AppObservation:
@@ -62,6 +71,14 @@ class AppEnvironment(Environment):
62
  isDone=self._state.isDone,
63
  rewardFeedback=self._state.rewardFeedback,
64
  rewardList=self._state.rewardList,
 
 
 
 
 
 
 
 
65
  )
66
 
67
  def step(self, action: AppAction) -> AppObservation:
@@ -77,6 +94,7 @@ class AppEnvironment(Environment):
77
  reward -= 10.0
78
  appendRewardFeedback(
79
  state,
 
80
  "No action is of invalid schema or format. Penalty applied.",
81
  reward,
82
  )
@@ -89,24 +107,51 @@ class AppEnvironment(Environment):
89
  isDone=state.isDone,
90
  rewardFeedback=state.rewardFeedback,
91
  rewardList=state.rewardList,
 
 
 
 
 
 
 
 
92
  )
93
 
94
  if action.isSegmentation and action is not None:
95
  reward += 10.0
96
- appendRewardFeedback(state, "Segmentation successful.", reward)
97
 
98
  if action.placement and action is not None:
99
- reward += place(action.isSegmentation, action.placement, state)
100
- appendRewardFeedback(state, "Object placed successfully.", reward)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  if action.findObjects and action is not None:
103
  reward += findobject(action.isSegmentation, action.findObjects, state)
104
- appendRewardFeedback(state, "Object found successfully.", reward)
105
 
106
- if len(state.objectsLeft) == 0:
 
 
 
107
  state.isDone = True
108
  reward += 10.0
109
- appendRewardFeedback(state, "All objects found. Episode completed!", reward)
 
 
110
 
111
  state.reward += reward / (10**state.step_count)
112
 
@@ -119,6 +164,14 @@ class AppEnvironment(Environment):
119
  isDone=state.isDone,
120
  rewardFeedback=state.rewardFeedback,
121
  rewardList=state.rewardList,
 
 
 
 
 
 
 
 
122
  )
123
 
124
  @property
 
22
  self._reset_count = 0
23
 
24
  def _coerce_state(self) -> AppState:
25
+
26
  if isinstance(self._state, AppState):
27
  return self._state
28
 
 
47
  reward=0.0,
48
  isDone=False,
49
  ObjectsPresent=placed,
50
+ ObjectsPlaced={},
51
  rewardFeedback=[],
52
  rewardList=[],
53
+ numberPlaced=0,
54
+ rewardListSegment=[],
55
+ rewardFeedbackSegment=[],
56
+ rewardListPlace=[],
57
+ rewardFeedbackPlace=[],
58
+ rewardListAdjust=[],
59
+ rewardFeedbackAdjust=[],
60
  )
61
 
62
  def reset(self) -> AppObservation:
 
71
  isDone=self._state.isDone,
72
  rewardFeedback=self._state.rewardFeedback,
73
  rewardList=self._state.rewardList,
74
+ numberPlaced=self._state.numberPlaced,
75
+ ObjectsPlaced=self._state.ObjectsPlaced,
76
+ rewardListSegment=self._state.rewardListSegment,
77
+ rewardFeedbackSegment=self._state.rewardFeedbackSegment,
78
+ rewardListPlace=self._state.rewardListPlace,
79
+ rewardFeedbackPlace=self._state.rewardFeedbackPlace,
80
+ rewardListAdjust=self._state.rewardListAdjust,
81
+ rewardFeedbackAdjust=self._state.rewardFeedbackAdjust,
82
  )
83
 
84
  def step(self, action: AppAction) -> AppObservation:
 
94
  reward -= 10.0
95
  appendRewardFeedback(
96
  state,
97
+ "",
98
  "No action is of invalid schema or format. Penalty applied.",
99
  reward,
100
  )
 
107
  isDone=state.isDone,
108
  rewardFeedback=state.rewardFeedback,
109
  rewardList=state.rewardList,
110
+ numberPlaced=state.numberPlaced,
111
+ ObjectsPlaced=state.ObjectsPlaced,
112
+ rewardListSegment=state.rewardListSegment,
113
+ rewardFeedbackSegment=state.rewardFeedbackSegment,
114
+ rewardListPlace=state.rewardListPlace,
115
+ rewardFeedbackPlace=state.rewardFeedbackPlace,
116
+ rewardListAdjust=state.rewardListAdjust,
117
+ rewardFeedbackAdjust=state.rewardFeedbackAdjust,
118
  )
119
 
120
  if action.isSegmentation and action is not None:
121
  reward += 10.0
122
+ appendRewardFeedback(state, "segment", "Segmentation successful.", reward)
123
 
124
  if action.placement and action is not None:
125
+ placement_reward, placement_failed = place(
126
+ action.isSegmentation, action.placement, state
127
+ )
128
+ reward += placement_reward
129
+ if placement_failed:
130
+ appendRewardFeedback(state, "place", "Failed to place object.", reward)
131
+ else:
132
+ appendRewardFeedback(
133
+ state, "place", "Object placed successfully.", reward
134
+ )
135
+
136
+ if action.adjust and action is not None:
137
+ reward += adjustment(action.isSegmentation, action.adjust, state)
138
+ appendRewardFeedback(
139
+ state, "adjust", "Object adjusted successfully.", reward
140
+ )
141
 
142
  if action.findObjects and action is not None:
143
  reward += findobject(action.isSegmentation, action.findObjects, state)
144
+ appendRewardFeedback(state, "segment", "Object found successfully.", reward)
145
 
146
+ if (
147
+ len(state.objectsLeft) == 0
148
+ and len(state.ObjectsPresent) == state.numberPlaced
149
+ ):
150
  state.isDone = True
151
  reward += 10.0
152
+ appendRewardFeedback(
153
+ state, "segment", "All objects found. Episode completed!", reward
154
+ )
155
 
156
  state.reward += reward / (10**state.step_count)
157
 
 
164
  isDone=state.isDone,
165
  rewardFeedback=state.rewardFeedback,
166
  rewardList=state.rewardList,
167
+ numberPlaced=state.numberPlaced,
168
+ ObjectsPlaced=state.ObjectsPlaced,
169
+ rewardListSegment=state.rewardListSegment,
170
+ rewardFeedbackSegment=state.rewardFeedbackSegment,
171
+ rewardListPlace=state.rewardListPlace,
172
+ rewardFeedbackPlace=state.rewardFeedbackPlace,
173
+ rewardListAdjust=state.rewardListAdjust,
174
+ rewardFeedbackAdjust=state.rewardFeedbackAdjust,
175
  )
176
 
177
  @property
utils.py CHANGED
@@ -47,10 +47,29 @@ OBJECT_NAMES = [
47
  "pouch",
48
  ]
49
 
 
 
 
 
 
 
 
 
 
50
 
51
- def appendRewardFeedback(state, feedback, reward):
52
- state.rewardFeedback.append(feedback)
53
  state.rewardList.append(reward)
 
 
 
 
 
 
 
 
 
 
54
 
55
 
56
  def initDimentions(obj):
@@ -124,7 +143,7 @@ def initGrid():
124
 
125
  def initWeightedGrid(shape=None):
126
  if shape is None:
127
- shape = (randint(5, 11), randint(5, 11), randint(5, 11))
128
 
129
  grid = random.uniform(0, 1, shape)
130
 
@@ -157,21 +176,27 @@ def _get_weight_value(weight, x, y, z):
157
  def place(segment, objects, state):
158
  dims = state.currentGrid
159
  weight = state.weightedGrid
 
 
160
  reward = 0.0
161
  totalObjs = len(objects)
162
  reward_per_obj_placed = 45.0 / totalObjs
163
 
164
- if segment or segment is None:
165
  appendRewardFeedback(
166
- state, "Placing objects without segmentation is not allowed.", -60.0
167
  )
168
  return -60.0
169
 
170
  for obj_name, pos in objects.items():
 
171
  obj = OBJECTS.get(obj_name)
172
  if obj is None:
173
  appendRewardFeedback(
174
- state, f"Object '{obj_name}' is not recognized.", -reward_per_obj_placed
 
 
 
175
  )
176
  reward -= reward_per_obj_placed
177
  continue
@@ -190,6 +215,7 @@ def place(segment, objects, state):
190
  reward -= reward_per_obj_placed
191
  appendRewardFeedback(
192
  state,
 
193
  f"Object '{obj_name}' placement is out of bounds.",
194
  -reward_per_obj_placed,
195
  )
@@ -200,6 +226,7 @@ def place(segment, objects, state):
200
  reward -= reward_per_obj_placed
201
  appendRewardFeedback(
202
  state,
 
203
  f"Object '{obj_name}' placement overlaps with another object and stacking is not allowed.",
204
  -reward_per_obj_placed,
205
  )
@@ -223,6 +250,7 @@ def place(segment, objects, state):
223
  reward += bonus
224
  appendRewardFeedback(
225
  state,
 
226
  f"Object '{obj_name}' placed with stacking. Bonus: {bonus:.2f}",
227
  bonus,
228
  )
@@ -230,6 +258,7 @@ def place(segment, objects, state):
230
  reward -= reward_per_obj_placed
231
  appendRewardFeedback(
232
  state,
 
233
  f"Object '{obj_name}' placement failed. No space for stacking.",
234
  -reward_per_obj_placed,
235
  )
@@ -245,6 +274,7 @@ def place(segment, objects, state):
245
  reward += bonus
246
  appendRewardFeedback(
247
  state,
 
248
  f"Object '{obj_name}' placed successfully. Bonus: {bonus:.2f}",
249
  bonus,
250
  )
@@ -254,16 +284,48 @@ def place(segment, objects, state):
254
  break
255
 
256
  if not placement_failed:
257
- state.ObjectsPresent[obj_name] = pos
258
-
259
- return reward
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
 
262
  def findobject(segment, objects, state):
263
 
264
  if not segment or segment is None:
265
  appendRewardFeedback(
266
- state, "Finding objects without segmentation is not allowed.", -60.0
 
 
 
 
 
 
 
 
 
 
 
 
267
  )
268
  return -60.0
269
 
@@ -275,7 +337,10 @@ def findobject(segment, objects, state):
275
  if pos_real is None:
276
  reward -= glMetric
277
  appendRewardFeedback(
278
- state, f"Object '{obj_found}' not found in the environment.", -glMetric
 
 
 
279
  )
280
  continue
281
 
@@ -283,6 +348,7 @@ def findobject(segment, objects, state):
283
  reward += glMetric
284
  appendRewardFeedback(
285
  state,
 
286
  f"Object '{obj_found}' found with correct position and stacking.",
287
  glMetric,
288
  )
@@ -292,6 +358,7 @@ def findobject(segment, objects, state):
292
  reward -= mse
293
  appendRewardFeedback(
294
  state,
 
295
  f"Object '{obj_found}' found with incorrect position. MSE: {mse:.2f}",
296
  -mse,
297
  )
@@ -300,6 +367,7 @@ def findobject(segment, objects, state):
300
  reward -= glMetric / 4.0
301
  appendRewardFeedback(
302
  state,
 
303
  f"Object '{obj_found}' found with incorrect stacking. Penalty: {glMetric / 4.0}",
304
  -glMetric / 4.0,
305
  )
@@ -307,6 +375,7 @@ def findobject(segment, objects, state):
307
  reward += glMetric / 4.0
308
  appendRewardFeedback(
309
  state,
 
310
  f"Object '{obj_found}' found with correct stacking. Bonus: {glMetric / 4.0}",
311
  glMetric / 4.0,
312
  )
@@ -316,3 +385,96 @@ def findobject(segment, objects, state):
316
  state.objectsFound.append(obj)
317
 
318
  return reward
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  "pouch",
48
  ]
49
 
50
+ ACTION_CONFIG = {
51
+ "RIGHT": [1, 0, 0],
52
+ "LEFT": [-1, 0, 0],
53
+ "UP": [0, 1, 0],
54
+ "DOWN": [0, -1, 0],
55
+ "FORWARD": [0, 0, 1],
56
+ "BACKWARD": [0, 0, -1],
57
+ "ROTATE": [0, 0, 0],
58
+ }
59
 
60
+
61
+ def appendRewardFeedback(state, choice, feedback, reward):
62
  state.rewardList.append(reward)
63
+ state.rewardFeedback.append(feedback)
64
+ if choice == "segment":
65
+ state.rewardFeedbackSegment.append(feedback)
66
+ state.rewardListSegment.append(reward)
67
+ elif choice == "place":
68
+ state.rewardFeedbackPlace.append(feedback)
69
+ state.rewardListPlace.append(reward)
70
+ elif choice == "adjust":
71
+ state.rewardFeedbackAdjust.append(feedback)
72
+ state.rewardListAdjust.append(reward)
73
 
74
 
75
  def initDimentions(obj):
 
143
 
144
  def initWeightedGrid(shape=None):
145
  if shape is None:
146
+ shape = (randint(8, 12), randint(8, 12), randint(8, 12))
147
 
148
  grid = random.uniform(0, 1, shape)
149
 
 
176
  def place(segment, objects, state):
177
  dims = state.currentGrid
178
  weight = state.weightedGrid
179
+ objsPresent = state.ObjectsPresent
180
+
181
  reward = 0.0
182
  totalObjs = len(objects)
183
  reward_per_obj_placed = 45.0 / totalObjs
184
 
185
+ if segment:
186
  appendRewardFeedback(
187
+ state, "place", "Placing objects with segmentation is not allowed.", -60.0
188
  )
189
  return -60.0
190
 
191
  for obj_name, pos in objects.items():
192
+
193
  obj = OBJECTS.get(obj_name)
194
  if obj is None:
195
  appendRewardFeedback(
196
+ state,
197
+ "place",
198
+ f"Object '{obj_name}' is not recognized.",
199
+ -reward_per_obj_placed,
200
  )
201
  reward -= reward_per_obj_placed
202
  continue
 
215
  reward -= reward_per_obj_placed
216
  appendRewardFeedback(
217
  state,
218
+ "place",
219
  f"Object '{obj_name}' placement is out of bounds.",
220
  -reward_per_obj_placed,
221
  )
 
226
  reward -= reward_per_obj_placed
227
  appendRewardFeedback(
228
  state,
229
+ "place",
230
  f"Object '{obj_name}' placement overlaps with another object and stacking is not allowed.",
231
  -reward_per_obj_placed,
232
  )
 
250
  reward += bonus
251
  appendRewardFeedback(
252
  state,
253
+ "place",
254
  f"Object '{obj_name}' placed with stacking. Bonus: {bonus:.2f}",
255
  bonus,
256
  )
 
258
  reward -= reward_per_obj_placed
259
  appendRewardFeedback(
260
  state,
261
+ "place",
262
  f"Object '{obj_name}' placement failed. No space for stacking.",
263
  -reward_per_obj_placed,
264
  )
 
274
  reward += bonus
275
  appendRewardFeedback(
276
  state,
277
+ "place",
278
  f"Object '{obj_name}' placed successfully. Bonus: {bonus:.2f}",
279
  bonus,
280
  )
 
284
  break
285
 
286
  if not placement_failed:
287
+ state.ObjectsPlaced[obj_name] = pos
288
+ state.numberPlaced += 1
289
+ try:
290
+ if objsPresent[obj_name] == state.ObjectsPlaced[obj_name]:
291
+ reward -= 45.0 / totalObjs
292
+ appendRewardFeedback(
293
+ state,
294
+ "place",
295
+ f"Object '{obj_name}' is being placed in the same location",
296
+ -reward_per_obj_placed,
297
+ )
298
+ except KeyError:
299
+ reward -= reward_per_obj_placed
300
+ appendRewardFeedback(
301
+ state,
302
+ "place",
303
+ f"Object '{obj_name}' is present in the environment, but is placed in same location as originally found.",
304
+ -reward_per_obj_placed,
305
+ )
306
+
307
+ continue
308
+
309
+ return (reward, placement_failed)
310
 
311
 
312
  def findobject(segment, objects, state):
313
 
314
  if not segment or segment is None:
315
  appendRewardFeedback(
316
+ state,
317
+ "segment",
318
+ "Finding objects without segmentation is not allowed.",
319
+ -60.0,
320
+ )
321
+ return -60.0
322
+
323
+ if state.ObjectsPresent == state.objectsFound:
324
+ appendRewardFeedback(
325
+ state,
326
+ "segment",
327
+ "No point in finding more objects as all are already found Make the IsSegement attribute false and execute the place method.",
328
+ -60.0,
329
  )
330
  return -60.0
331
 
 
337
  if pos_real is None:
338
  reward -= glMetric
339
  appendRewardFeedback(
340
+ state,
341
+ "segment",
342
+ f"Object '{obj_found}' not found in the environment.",
343
+ -glMetric,
344
  )
345
  continue
346
 
 
348
  reward += glMetric
349
  appendRewardFeedback(
350
  state,
351
+ "segment",
352
  f"Object '{obj_found}' found with correct position and stacking.",
353
  glMetric,
354
  )
 
358
  reward -= mse
359
  appendRewardFeedback(
360
  state,
361
+ "segment",
362
  f"Object '{obj_found}' found with incorrect position. MSE: {mse:.2f}",
363
  -mse,
364
  )
 
367
  reward -= glMetric / 4.0
368
  appendRewardFeedback(
369
  state,
370
+ "segment",
371
  f"Object '{obj_found}' found with incorrect stacking. Penalty: {glMetric / 4.0}",
372
  -glMetric / 4.0,
373
  )
 
375
  reward += glMetric / 4.0
376
  appendRewardFeedback(
377
  state,
378
+ "segment",
379
  f"Object '{obj_found}' found with correct stacking. Bonus: {glMetric / 4.0}",
380
  glMetric / 4.0,
381
  )
 
385
  state.objectsFound.append(obj)
386
 
387
  return reward
388
+
389
+
390
+ def _remove_object(state, obj_name):
391
+ reward = 0
392
+ try:
393
+ pos = state.ObjectsPlaced.pop(obj_name)
394
+ except KeyError:
395
+ reward -= 45.0 / len(state.ObjectsPresent)
396
+ appendRewardFeedback(
397
+ state,
398
+ "adjust",
399
+ f"Object '{obj_name}' is not placed in the environment.",
400
+ -reward,
401
+ )
402
+ return reward
403
+
404
+ state.numberPlaced -= 1
405
+ dims = state.currentGrid
406
+ obj = OBJECTS.get(obj_name)
407
+ objGrid = initDimentions(obj)
408
+
409
+ for i in range(len(objGrid)):
410
+ for j in range(len(objGrid[0])):
411
+ for k in range(len(objGrid[0][0])):
412
+ if dims[pos[0] + i][pos[1] + j][pos[2] + k] > 0:
413
+ dims[pos[0] + i][pos[1] + j][pos[2] + k] -= 1
414
+
415
+
416
+ def _adjustment_helper(state, name, pos, change, direction):
417
+ _remove_object(state, name)
418
+
419
+ if direction == "ROTATE":
420
+ newPos = (pos[1], pos[0], pos[2], pos[3])
421
+ else:
422
+ newPos = (pos[0] + change[0], pos[1] + change[1], pos[2] + change[2], pos[3])
423
+
424
+ reward, isNotPlaced = place(False, {name: newPos}, state)
425
+
426
+ if isNotPlaced:
427
+ dummyReward = place(False, {name: pos}, state)[0]
428
+ appendRewardFeedback(
429
+ state,
430
+ "adjust",
431
+ f"Failed to adjust object '{name}' in direction {direction}. Reverting to original position.",
432
+ -dummyReward,
433
+ )
434
+ return -dummyReward
435
+
436
+ appendRewardFeedback(
437
+ state,
438
+ "adjust",
439
+ f"Object '{name}' moved {direction} successfully.",
440
+ reward,
441
+ )
442
+ return reward
443
+
444
+
445
+ def adjustment(segment, action, state):
446
+ objsPlaced = state.ObjectsPlaced
447
+
448
+ if segment:
449
+ appendRewardFeedback(
450
+ state, "adjust", "Placing objects with segmentation is not allowed.", -60.0
451
+ )
452
+ return -60.0
453
+
454
+ try:
455
+ initPos = objsPlaced[action[0]]
456
+ name = action[0]
457
+ except KeyError:
458
+ reward_per_obj_placed = 45.0 / len(state.ObjectsPresent)
459
+ appendRewardFeedback(
460
+ state,
461
+ "adjust",
462
+ f"Object '{action[0]}' is not placed in the environment, so it cannot be adjusted.",
463
+ -reward_per_obj_placed,
464
+ )
465
+ return -reward_per_obj_placed
466
+
467
+ if action[1] in ACTION_CONFIG:
468
+ reward = _adjustment_helper(
469
+ state, name, initPos, ACTION_CONFIG.get(action[1]), action[1]
470
+ )
471
+ return reward
472
+ else:
473
+ reward_per_obj_placed = 45.0 / len(state.ObjectsPresent)
474
+ appendRewardFeedback(
475
+ state,
476
+ "adjust",
477
+ f"Invalid adjustment direction '{action[1]}'. Valid directions are RIGHT, LEFT, UP, DOWN, FORWARD, BACKWARD, ROTATE.",
478
+ -reward_per_obj_placed,
479
+ )
480
+ return -reward_per_obj_placed