Upload folder using huggingface_hub
Browse files- README.md +48 -2
- client.py +5 -0
- grader.py +124 -0
- inference.py +40 -17
- models.py +95 -0
- server/app.py +13 -13
- server/app_environment.py +59 -6
- utils.py +173 -11
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 🔊
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: yellow
|
|
@@ -10,4 +10,50 @@ app_file: server/app.py
|
|
| 10 |
pinned: false
|
| 11 |
app_port: 8000
|
| 12 |
base_path: /web
|
| 13 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: The Sorter Project
|
| 3 |
emoji: 🔊
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: yellow
|
|
|
|
| 10 |
pinned: false
|
| 11 |
app_port: 8000
|
| 12 |
base_path: /web
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
# The Sorter Project
|
| 16 |
+
|
| 17 |
+
## The Purpose
|
| 18 |
+
Building an environment to make AI models learn on how to **identify**, **place** and **adjust** the position of things in the environment which are scattered in a *random* fashion.
|
| 19 |
+
|
| 20 |
+
## Real Life Application
|
| 21 |
+
We came up with this idea, keeping in mind its application in factories, warehouses and storage facilities. _(and even your coffee table!)_
|
| 22 |
+
|
| 23 |
+
## The Problem
|
| 24 |
+
### **The Industrial Perspective / Micro Perspective**
|
| 25 |
+
|
| 26 |
+
Companies spend milllions if not billions on establishing, maintaining and organisising warehouses and storage facilities, and in a densely populated country like India with increasing demand for land and with the surging property prices efficient storage and orgsation becomes the ***need of the hour***, leading to the demand for an environment or an agent that can help companies and organisations and provide them with ways for the maximum efficient and logical storage of their "objects".
|
| 27 |
+
The environments and agents that specialise in full fledged identifying, sorting, stacking and organising of objects or warehouse material are few in number, and ***we are here to fill that gap***.
|
| 28 |
+
|
| 29 |
+
### **The Populational Perspective / Macro Persepective**
|
| 30 |
+
|
| 31 |
+
With increase in population causing decrease of 'Open Spaces' it becomes extremely important to **build societies and localities that can cater to a huge chunk of population** and in such a case, The Sorter Project though being mainly built for industrial application, becomes an extremely useful tool that allows proper space utilisation to accomatodate more people whilst taking minimum space. _(so in the near future we might not have to shift to mars)_
|
| 32 |
+
|
| 33 |
+
## Our Solution
|
| 34 |
+
_We have developed this environment with 'ease' thanks to OpenEnv!_ <br>
|
| 35 |
+
Our Sorter Project consists of ***_3_ different parts*** and ***_4_ different processes/tasks***:
|
| 36 |
+
|
| 37 |
+
### **Part 1**: The Segmentation Problem<br>
|
| 38 |
+
**Task 1:** Our Project has an ****Segmentation Action**** that makes agents identify objects which is rare to find in multiple similar environments.
|
| 39 |
+
### **Part 2**: The Identification Problem<br>
|
| 40 |
+
**Task 2:** Our Project has a ****Identification Action**** which is though a part of the Segmentation task, is slightly different, it allows agents to segregate objects into **stackable** and **not stackable** which will be of high importance while addressing the next problem.
|
| 41 |
+
|
| 42 |
+
### **Part 3**: The Placement Problem<br>
|
| 43 |
+
**Task 3:** Our Project has a ****Placement Action**** that allows agents to place things it has found.<br>
|
| 44 |
+
**Task 4:** It also provides an ****Adjust Action**** for the agent to adjust things _(because no one's good in their first try ! )_
|
| 45 |
+
|
| 46 |
+
## Technical Details
|
| 47 |
+
### Reward Logic
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
### Demonstration
|
| 51 |
+
|
| 52 |
+
## Links
|
| 53 |
+
****Huggingface Link**** (to run `inference.py`): https://huggingface.co/spaces/Jibrann/app <br>
|
| 54 |
+
****Github Link**** (this page): https://github.com/jibcamun/Reinforcement-Learning-Object-Placement
|
| 55 |
+
|
| 56 |
+
## Related Works
|
| 57 |
+
[Jumanji](https://github.com/instadeepai/jumanji)<br>
|
| 58 |
+
[miniRL](https://proxyapps.exascaleproject.org/app/minirl/)<br>
|
| 59 |
+
[BabyAI](https://arxiv.org/abs/1810.08272)<br>
|
client.py
CHANGED
|
@@ -16,6 +16,7 @@ class AppEnv(EnvClient[AppAction, AppObservation, AppState]):
|
|
| 16 |
"placement": action.placement,
|
| 17 |
"isSegmentation": action.isSegmentation,
|
| 18 |
"findObjects": action.findObjects,
|
|
|
|
| 19 |
}
|
| 20 |
|
| 21 |
def _parse_result(self, payload: Dict) -> StepResult[AppObservation]:
|
|
@@ -30,6 +31,8 @@ class AppEnv(EnvClient[AppAction, AppObservation, AppState]):
|
|
| 30 |
isDone=obs_data.get("isDone", False),
|
| 31 |
rewardFeedback=obs_data.get("rewardFeedback", []),
|
| 32 |
rewardList=obs_data.get("rewardList", []),
|
|
|
|
|
|
|
| 33 |
)
|
| 34 |
|
| 35 |
return StepResult(
|
|
@@ -52,4 +55,6 @@ class AppEnv(EnvClient[AppAction, AppObservation, AppState]):
|
|
| 52 |
ObjectsPresent=payload.get("ObjectsPresent", {}),
|
| 53 |
rewardFeedback=payload.get("rewardFeedback", []),
|
| 54 |
rewardList=payload.get("rewardList", []),
|
|
|
|
|
|
|
| 55 |
)
|
|
|
|
| 16 |
"placement": action.placement,
|
| 17 |
"isSegmentation": action.isSegmentation,
|
| 18 |
"findObjects": action.findObjects,
|
| 19 |
+
"adjust": action.adjust,
|
| 20 |
}
|
| 21 |
|
| 22 |
def _parse_result(self, payload: Dict) -> StepResult[AppObservation]:
|
|
|
|
| 31 |
isDone=obs_data.get("isDone", False),
|
| 32 |
rewardFeedback=obs_data.get("rewardFeedback", []),
|
| 33 |
rewardList=obs_data.get("rewardList", []),
|
| 34 |
+
numberPlaced=obs_data.get("numberPlaced", 0),
|
| 35 |
+
ObjectsPlaced=obs_data.get("ObjectsPlaced", {}),
|
| 36 |
)
|
| 37 |
|
| 38 |
return StepResult(
|
|
|
|
| 55 |
ObjectsPresent=payload.get("ObjectsPresent", {}),
|
| 56 |
rewardFeedback=payload.get("rewardFeedback", []),
|
| 57 |
rewardList=payload.get("rewardList", []),
|
| 58 |
+
numberPlaced=payload.get("numberPlaced", 0),
|
| 59 |
+
ObjectsPlaced=payload.get("ObjectsPlaced", {}),
|
| 60 |
)
|
grader.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
import json
|
| 6 |
+
from json import JSONDecodeError
|
| 7 |
+
from numpy import average
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
API_URL = os.getenv("API_BASE_URL")
|
| 13 |
+
MODEL = os.getenv("MODEL_NAME")
|
| 14 |
+
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
|
| 15 |
+
|
| 16 |
+
SYSTEM_PROMPT_GRADING = """
|
| 17 |
+
|
| 18 |
+
You are a professional object sorter who works at the industry level and
|
| 19 |
+
has a good knowledge about how and where things are to be places, you shall receieve the
|
| 20 |
+
list of feedbacks from an accomplice hired, you shall rate the feedback on the scale of 0.0 to 1.0 ONLY.
|
| 21 |
+
|
| 22 |
+
Rules:
|
| 23 |
+
- You shall rate the feedback on the scale of 0.0 to 1.0 ONLY AND also provide a one-line feedback
|
| 24 |
+
- You WILL STRICTLY ABIDE BY THIS JSON FORMAT:
|
| 25 |
+
{
|
| 26 |
+
"grade": float,
|
| 27 |
+
"feedback": str,
|
| 28 |
+
}
|
| 29 |
+
""".strip()
|
| 30 |
+
|
| 31 |
+
TEMPERATURE = 0.2
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _feed_llm(input):
|
| 35 |
+
if not API_URL or not MODEL or not API_KEY:
|
| 36 |
+
missing = [
|
| 37 |
+
name
|
| 38 |
+
for name, value in (
|
| 39 |
+
("API_BASE_URL", API_URL),
|
| 40 |
+
("MODEL_NAME", MODEL),
|
| 41 |
+
("API_KEY/HF_TOKEN", API_KEY),
|
| 42 |
+
)
|
| 43 |
+
if not value
|
| 44 |
+
]
|
| 45 |
+
raise RuntimeError(
|
| 46 |
+
f"Missing required environment variables: {', '.join(missing)}"
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
client = OpenAI(
|
| 50 |
+
base_url=API_URL,
|
| 51 |
+
api_key=API_KEY,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
llm_output = client.chat.completions.create(
|
| 55 |
+
model=MODEL,
|
| 56 |
+
messages=[
|
| 57 |
+
{"role": "system", "content": SYSTEM_PROMPT_GRADING},
|
| 58 |
+
{"role": "user", "content": f"{input}"},
|
| 59 |
+
],
|
| 60 |
+
temperature=TEMPERATURE,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
return llm_output.choices[0].message.content or ""
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _extract_json_payload(output_str: str):
|
| 67 |
+
output_str = output_str.strip()
|
| 68 |
+
|
| 69 |
+
if output_str.startswith("```"):
|
| 70 |
+
lines = output_str.splitlines()
|
| 71 |
+
if len(lines) >= 3:
|
| 72 |
+
output_str = "\n".join(lines[1:-1]).strip()
|
| 73 |
+
|
| 74 |
+
start = output_str.find("{")
|
| 75 |
+
end = output_str.rfind("}")
|
| 76 |
+
|
| 77 |
+
if start == -1 or end == -1 or end < start:
|
| 78 |
+
raise JSONDecodeError("No JSON object found in model output", output_str, 0)
|
| 79 |
+
|
| 80 |
+
return output_str[start : end + 1]
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def parse_output(output_str):
|
| 84 |
+
data = json.loads(_extract_json_payload(output_str))
|
| 85 |
+
return data
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def grade_segmentation(appObs):
|
| 89 |
+
scaler = MinMaxScaler()
|
| 90 |
+
reward = appObs.rewardListSegment
|
| 91 |
+
feedback = appObs.rewardFeedbackSegment
|
| 92 |
+
scaler.fit(reward)
|
| 93 |
+
grade = average(scaler.transform(reward))
|
| 94 |
+
llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
|
| 95 |
+
outputFeedback = llmOutput.get("feedback", "")
|
| 96 |
+
outputGrade = llmOutput.get("grade", 0.0)
|
| 97 |
+
cumulativeGrade = (grade + outputGrade) / 2.0
|
| 98 |
+
return (grade, outputGrade, cumulativeGrade, outputFeedback)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def grade_placement(appObs):
|
| 102 |
+
scaler = MinMaxScaler()
|
| 103 |
+
reward = appObs.rewardListPlace
|
| 104 |
+
feedback = appObs.rewardFeedbackPlace
|
| 105 |
+
scaler.fit(reward)
|
| 106 |
+
grade = average(scaler.transform(reward))
|
| 107 |
+
llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
|
| 108 |
+
outputFeedback = llmOutput.get("feedback", "")
|
| 109 |
+
outputGrade = llmOutput.get("grade", 0.0)
|
| 110 |
+
cumulativeGrade = (grade + outputGrade) / 2.0
|
| 111 |
+
return (grade, outputGrade, cumulativeGrade, outputFeedback)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def grade_segmentation(appObs):
|
| 115 |
+
scaler = MinMaxScaler()
|
| 116 |
+
reward = appObs.rewardListAdjust
|
| 117 |
+
feedback = appObs.rewardFeedbackAdjust
|
| 118 |
+
scaler.fit(reward)
|
| 119 |
+
grade = average(scaler.transform(reward))
|
| 120 |
+
llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
|
| 121 |
+
outputFeedback = llmOutput.get("feedback", "")
|
| 122 |
+
outputGrade = llmOutput.get("grade", 0.0)
|
| 123 |
+
cumulativeGrade = (grade + outputGrade) / 2.0
|
| 124 |
+
return (grade, outputGrade, cumulativeGrade, outputFeedback)
|
inference.py
CHANGED
|
@@ -3,7 +3,7 @@ from dotenv import load_dotenv
|
|
| 3 |
from openai import OpenAI
|
| 4 |
import json
|
| 5 |
from json import JSONDecodeError
|
| 6 |
-
import
|
| 7 |
|
| 8 |
try:
|
| 9 |
from models import AppAction, AppObservation
|
|
@@ -15,8 +15,14 @@ try:
|
|
| 15 |
except ImportError:
|
| 16 |
from app.server.app_environment import AppEnvironment
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
load_dotenv()
|
|
|
|
| 20 |
|
| 21 |
API_URL = os.getenv("API_BASE_URL")
|
| 22 |
MODEL = os.getenv("MODEL_NAME")
|
|
@@ -28,6 +34,7 @@ FALLBACK_ACTION = {
|
|
| 28 |
"isSegmentation": False,
|
| 29 |
"placement": {},
|
| 30 |
"findObjects": {},
|
|
|
|
| 31 |
}
|
| 32 |
|
| 33 |
DEBUG = True
|
|
@@ -38,7 +45,8 @@ SYSTEM_PROMPT = """
|
|
| 38 |
1. **Segment objects** in the environment if `isSegmentation=True`.
|
| 39 |
2. **Identify objects** and their properties (name, stackable) accurately.
|
| 40 |
3. **Place objects** in the 3D grid respecting stacking rules and dimensions.
|
| 41 |
-
4. **
|
|
|
|
| 42 |
|
| 43 |
You must strictly return actions that conform to this Pydantic schema:
|
| 44 |
|
|
@@ -47,37 +55,39 @@ SYSTEM_PROMPT = """
|
|
| 47 |
placement: Dict[str, Tuple[int, int, int, bool]]
|
| 48 |
isSegmentation: bool
|
| 49 |
findObjects: Dict[str, Tuple[int, int, int, bool]]
|
|
|
|
| 50 |
}
|
| 51 |
|
| 52 |
Rules:
|
| 53 |
- Only report objects that are found or placed; empty dicts are valid if none.
|
| 54 |
-
- Do not modify objects that are already placed unless instructed.
|
| 55 |
- Coordinates must be within the grid bounds.
|
| 56 |
- Respect stackable property: non-stackable objects cannot be placed on top of another object.
|
| 57 |
- Use previous step’s reward and rewardFeedback to adjust your strategy.
|
|
|
|
| 58 |
|
| 59 |
Output:
|
| 60 |
- Always return a valid JSON object conforming to the schema.
|
| 61 |
- Do not include any extra text, explanations, or commentary.
|
| 62 |
-
- If no action is possible, return empty dicts for `placement` and `findObjects`.
|
| 63 |
|
| 64 |
Your goal:
|
| 65 |
- Maximize cumulative reward.
|
| 66 |
- Identify all objects correctly.
|
| 67 |
-
- Place objects efficiently while respecting stacking rules.
|
| 68 |
- Learn from reward feedback to improve placement in future steps.
|
| 69 |
|
| 70 |
Always return a valid JSON that conforms exactly to the AppAction Pydantic model:
|
| 71 |
-
{"placement": Dict[str, Tuple[int,int,int,bool]] or {}, "isSegmentation": bool, "findObjects": Dict[str, Tuple[int,int,int,bool]] or {}}
|
| 72 |
|
| 73 |
Actions:
|
| 74 |
-
- To place an object: {"isSegmentation": false, "placement": {"object_name": [x, y, z, stackable]}, "findObjects": {}}
|
| 75 |
-
- To segment objects: {"isSegmentation": true, "placement": {}, "findObjects": {"object_name": [x, y, z, stackable]}}
|
|
|
|
|
|
|
| 76 |
|
| 77 |
Do not include explanations, text, or extra fields.
|
| 78 |
-
If no objects are found or
|
| 79 |
-
The output must be parseable and valid for AppAction(**json_output).
|
| 80 |
-
""".strip()
|
| 81 |
|
| 82 |
MESSAGES = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 83 |
HISTORY = []
|
|
@@ -125,7 +135,9 @@ def main() -> None:
|
|
| 125 |
)
|
| 126 |
if not value
|
| 127 |
]
|
| 128 |
-
raise RuntimeError(
|
|
|
|
|
|
|
| 129 |
|
| 130 |
env = AppEnvironment()
|
| 131 |
observation: AppObservation = env.reset()
|
|
@@ -148,22 +160,33 @@ def main() -> None:
|
|
| 148 |
|
| 149 |
llm_output = client.chat.completions.create(
|
| 150 |
model=MODEL,
|
| 151 |
-
messages=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
temperature=TEMPERATURE,
|
| 153 |
)
|
| 154 |
|
| 155 |
message_content = llm_output.choices[0].message.content or ""
|
|
|
|
| 156 |
action: AppAction = parse_output(message_content)
|
| 157 |
-
MESSAGES.append({"role": "assistant", "content": message_content})
|
| 158 |
observation: AppObservation = env.step(action)
|
| 159 |
|
|
|
|
|
|
|
| 160 |
HISTORY.append(observation)
|
|
|
|
| 161 |
|
| 162 |
if observation.isDone:
|
| 163 |
break
|
| 164 |
-
|
| 165 |
-
time.sleep(100)
|
| 166 |
-
|
| 167 |
print(HISTORY)
|
| 168 |
|
| 169 |
|
|
|
|
| 3 |
from openai import OpenAI
|
| 4 |
import json
|
| 5 |
from json import JSONDecodeError
|
| 6 |
+
from numpy import set_printoptions
|
| 7 |
|
| 8 |
try:
|
| 9 |
from models import AppAction, AppObservation
|
|
|
|
| 15 |
except ImportError:
|
| 16 |
from app.server.app_environment import AppEnvironment
|
| 17 |
|
| 18 |
+
try:
|
| 19 |
+
from grader import *
|
| 20 |
+
except ImportError:
|
| 21 |
+
from app.grader import *
|
| 22 |
+
|
| 23 |
|
| 24 |
load_dotenv()
|
| 25 |
+
set_printoptions(precision=2, suppress=True)
|
| 26 |
|
| 27 |
API_URL = os.getenv("API_BASE_URL")
|
| 28 |
MODEL = os.getenv("MODEL_NAME")
|
|
|
|
| 34 |
"isSegmentation": False,
|
| 35 |
"placement": {},
|
| 36 |
"findObjects": {},
|
| 37 |
+
"adjust": ("", "", 0),
|
| 38 |
}
|
| 39 |
|
| 40 |
DEBUG = True
|
|
|
|
| 45 |
1. **Segment objects** in the environment if `isSegmentation=True`.
|
| 46 |
2. **Identify objects** and their properties (name, stackable) accurately.
|
| 47 |
3. **Place objects** in the 3D grid respecting stacking rules and dimensions.
|
| 48 |
+
4. **Adjust object positions** if necessary to optimize placement and maximize rewards.
|
| 49 |
+
5. **Use rewards and feedback** from previous steps to improve future actions.
|
| 50 |
|
| 51 |
You must strictly return actions that conform to this Pydantic schema:
|
| 52 |
|
|
|
|
| 55 |
placement: Dict[str, Tuple[int, int, int, bool]]
|
| 56 |
isSegmentation: bool
|
| 57 |
findObjects: Dict[str, Tuple[int, int, int, bool]]
|
| 58 |
+
adjust : Tuple[str, str, int]
|
| 59 |
}
|
| 60 |
|
| 61 |
Rules:
|
| 62 |
- Only report objects that are found or placed; empty dicts are valid if none.
|
|
|
|
| 63 |
- Coordinates must be within the grid bounds.
|
| 64 |
- Respect stackable property: non-stackable objects cannot be placed on top of another object.
|
| 65 |
- Use previous step’s reward and rewardFeedback to adjust your strategy.
|
| 66 |
+
- Directions for adjustments for an object can be "UP", "DOWN", "LEFT", "RIGHT", "FORWARD", "BACKWARD", "ROTATE" with a positive integer amount.
|
| 67 |
|
| 68 |
Output:
|
| 69 |
- Always return a valid JSON object conforming to the schema.
|
| 70 |
- Do not include any extra text, explanations, or commentary.
|
| 71 |
+
- If no action is possible, return empty dicts for `placement` and `findObjects` and an empty tuple for `adjust`.
|
| 72 |
|
| 73 |
Your goal:
|
| 74 |
- Maximize cumulative reward.
|
| 75 |
- Identify all objects correctly.
|
| 76 |
+
- Place objects efficiently while respecting stacking rules (PS: Do not place the objects in the same location as where it is originally found and use adjust function wherever required.)
|
| 77 |
- Learn from reward feedback to improve placement in future steps.
|
| 78 |
|
| 79 |
Always return a valid JSON that conforms exactly to the AppAction Pydantic model:
|
| 80 |
+
{"placement": Dict[str, Tuple[int,int,int,bool]] or {}, "isSegmentation": bool, "findObjects": Dict[str, Tuple[int,int,int,bool]] or {},"adjust": Tuple[str,str,int] or ("", "", 0)}
|
| 81 |
|
| 82 |
Actions:
|
| 83 |
+
- To place an object: {"isSegmentation": false, "placement": {"object_name": [x, y, z, stackable]}, "findObjects": {}, "adjust":("", "", 0)}
|
| 84 |
+
- To segment objects: {"isSegmentation": true, "placement": {}, "findObjects": {"object_name": [x, y, z, stackable]}, "adjust":("", "", 0)}
|
| 85 |
+
- To adjust objects: {"isSegmentation": false, "placement": {}, "findObjects": {}, "adjust":("object_name", "direction", amount)}
|
| 86 |
+
- To adjust and place objects: {"isSegmentation": false, "placement": {"object_name": [x, y, z, stackable]}, "findObjects": {}, "adjust":("object_name", "direction", amount)}
|
| 87 |
|
| 88 |
Do not include explanations, text, or extra fields.
|
| 89 |
+
If no objects are found, placed or adjusted, return empty dicts for placement and findObjects and empty tuple for adjust.
|
| 90 |
+
The output must be parseable and valid for AppAction(**json_output).""".strip()
|
|
|
|
| 91 |
|
| 92 |
MESSAGES = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 93 |
HISTORY = []
|
|
|
|
| 135 |
)
|
| 136 |
if not value
|
| 137 |
]
|
| 138 |
+
raise RuntimeError(
|
| 139 |
+
f"Missing required environment variables: {', '.join(missing)}"
|
| 140 |
+
)
|
| 141 |
|
| 142 |
env = AppEnvironment()
|
| 143 |
observation: AppObservation = env.reset()
|
|
|
|
| 160 |
|
| 161 |
llm_output = client.chat.completions.create(
|
| 162 |
model=MODEL,
|
| 163 |
+
messages=[
|
| 164 |
+
MESSAGES[0],
|
| 165 |
+
{
|
| 166 |
+
"role": "user",
|
| 167 |
+
"content": f"""Observation: {observation.model_dump_json()},
|
| 168 |
+
Previous reward: {observation.reward},
|
| 169 |
+
Previous reward list: {observation.rewardList},
|
| 170 |
+
Previous reward feedback: {observation.rewardFeedback},
|
| 171 |
+
Step: {i}""".strip(),
|
| 172 |
+
},
|
| 173 |
+
],
|
| 174 |
temperature=TEMPERATURE,
|
| 175 |
)
|
| 176 |
|
| 177 |
message_content = llm_output.choices[0].message.content or ""
|
| 178 |
+
|
| 179 |
action: AppAction = parse_output(message_content)
|
|
|
|
| 180 |
observation: AppObservation = env.step(action)
|
| 181 |
|
| 182 |
+
MESSAGES.append({"role": "assistant", "content": message_content})
|
| 183 |
+
print(message_content)
|
| 184 |
HISTORY.append(observation)
|
| 185 |
+
print(observation)
|
| 186 |
|
| 187 |
if observation.isDone:
|
| 188 |
break
|
| 189 |
+
|
|
|
|
|
|
|
| 190 |
print(HISTORY)
|
| 191 |
|
| 192 |
|
models.py
CHANGED
|
@@ -9,13 +9,20 @@ class AppAction(Action):
|
|
| 9 |
placement: Dict[str, Tuple[int, int, int, bool]] = Field(
|
| 10 |
default_factory=dict, description="Placement of the object in a 3D grid"
|
| 11 |
)
|
|
|
|
| 12 |
isSegmentation: bool = Field(
|
| 13 |
default=True, description="Whether the model is segmenting the objects"
|
| 14 |
)
|
|
|
|
| 15 |
findObjects: Dict[str, Tuple[int, int, int, bool]] = Field(
|
| 16 |
default_factory=dict, description="Dictionary of objects"
|
| 17 |
)
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
class AppObservation(Observation):
|
| 21 |
"""Observation from the App environment"""
|
|
@@ -24,21 +31,26 @@ class AppObservation(Observation):
|
|
| 24 |
default_factory=list,
|
| 25 |
description="Current placement of the objects in a 3D grid",
|
| 26 |
)
|
|
|
|
| 27 |
positions: Dict[str, Tuple[int, int, int, bool]] = Field(
|
| 28 |
default_factory=dict,
|
| 29 |
description="Dictionary of objects with their positions in the environment",
|
| 30 |
)
|
|
|
|
| 31 |
objectsLeft: List[str] = Field(
|
| 32 |
default_factory=list,
|
| 33 |
description="List of unorganised objects left in the environment",
|
| 34 |
)
|
|
|
|
| 35 |
objectsFound: List[str] = Field(
|
| 36 |
default_factory=list,
|
| 37 |
description="List of objects found in the environment",
|
| 38 |
)
|
|
|
|
| 39 |
reward: float = Field(
|
| 40 |
default=0.0, description="Reward received after taking the action"
|
| 41 |
)
|
|
|
|
| 42 |
isDone: bool = Field(default=False, description="Whether the episode has ended")
|
| 43 |
|
| 44 |
rewardFeedback: list[str] = Field(
|
|
@@ -51,6 +63,46 @@ class AppObservation(Observation):
|
|
| 51 |
description="List of reward values received after taking the action",
|
| 52 |
)
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
class AppState(State):
|
| 56 |
"""State for the App environment"""
|
|
@@ -69,13 +121,16 @@ class AppState(State):
|
|
| 69 |
default_factory=list,
|
| 70 |
description="List of unorganised objects left in the environment",
|
| 71 |
)
|
|
|
|
| 72 |
objectsFound: List[str] = Field(
|
| 73 |
default_factory=list,
|
| 74 |
description="List of objects found in the environment",
|
| 75 |
)
|
|
|
|
| 76 |
reward: float = Field(
|
| 77 |
default=0.0, description="Reward received after taking the action"
|
| 78 |
)
|
|
|
|
| 79 |
isDone: bool = Field(default=False, description="Whether the episode has ended")
|
| 80 |
|
| 81 |
ObjectsPresent: Dict[str, Tuple[int, int, int, bool]] = Field(
|
|
@@ -83,6 +138,11 @@ class AppState(State):
|
|
| 83 |
description="Placed objects and their current positions in the environment",
|
| 84 |
)
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
rewardFeedback: list[str] = Field(
|
| 87 |
default_factory=list,
|
| 88 |
description="List of feedback strings describing the reward received after taking the action",
|
|
@@ -92,3 +152,38 @@ class AppState(State):
|
|
| 92 |
default_factory=list,
|
| 93 |
description="List of reward values received after taking the action",
|
| 94 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
placement: Dict[str, Tuple[int, int, int, bool]] = Field(
|
| 10 |
default_factory=dict, description="Placement of the object in a 3D grid"
|
| 11 |
)
|
| 12 |
+
|
| 13 |
isSegmentation: bool = Field(
|
| 14 |
default=True, description="Whether the model is segmenting the objects"
|
| 15 |
)
|
| 16 |
+
|
| 17 |
findObjects: Dict[str, Tuple[int, int, int, bool]] = Field(
|
| 18 |
default_factory=dict, description="Dictionary of objects"
|
| 19 |
)
|
| 20 |
|
| 21 |
+
adjust: Tuple[str, str, int] = Field(
|
| 22 |
+
default=("", "", 0),
|
| 23 |
+
description="Adjustment action for moving or rotating objects. Format: (object_name, direction, amount)",
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
|
| 27 |
class AppObservation(Observation):
|
| 28 |
"""Observation from the App environment"""
|
|
|
|
| 31 |
default_factory=list,
|
| 32 |
description="Current placement of the objects in a 3D grid",
|
| 33 |
)
|
| 34 |
+
|
| 35 |
positions: Dict[str, Tuple[int, int, int, bool]] = Field(
|
| 36 |
default_factory=dict,
|
| 37 |
description="Dictionary of objects with their positions in the environment",
|
| 38 |
)
|
| 39 |
+
|
| 40 |
objectsLeft: List[str] = Field(
|
| 41 |
default_factory=list,
|
| 42 |
description="List of unorganised objects left in the environment",
|
| 43 |
)
|
| 44 |
+
|
| 45 |
objectsFound: List[str] = Field(
|
| 46 |
default_factory=list,
|
| 47 |
description="List of objects found in the environment",
|
| 48 |
)
|
| 49 |
+
|
| 50 |
reward: float = Field(
|
| 51 |
default=0.0, description="Reward received after taking the action"
|
| 52 |
)
|
| 53 |
+
|
| 54 |
isDone: bool = Field(default=False, description="Whether the episode has ended")
|
| 55 |
|
| 56 |
rewardFeedback: list[str] = Field(
|
|
|
|
| 63 |
description="List of reward values received after taking the action",
|
| 64 |
)
|
| 65 |
|
| 66 |
+
numberPlaced: int = Field(
|
| 67 |
+
default=0,
|
| 68 |
+
description="Number of objects successfully placed in the environment",
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field(
|
| 72 |
+
default_factory=dict,
|
| 73 |
+
description="Objects that have been successfully placed in the environment",
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
rewardListSegment: list[float] = Field(
|
| 77 |
+
default_factory=list,
|
| 78 |
+
description="List of reward values received after taking the action",
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
rewardFeedbackSegment: list[str] = Field(
|
| 82 |
+
default_factory=list,
|
| 83 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
rewardListPlace: list[float] = Field(
|
| 87 |
+
default_factory=list,
|
| 88 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
rewardFeedbackPlace: list[str] = Field(
|
| 92 |
+
default_factory=list,
|
| 93 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
rewardListAdjust: list[float] = Field(
|
| 97 |
+
default_factory=list,
|
| 98 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
rewardFeedbackAdjust: list[str] = Field(
|
| 102 |
+
default_factory=list,
|
| 103 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
|
| 107 |
class AppState(State):
|
| 108 |
"""State for the App environment"""
|
|
|
|
| 121 |
default_factory=list,
|
| 122 |
description="List of unorganised objects left in the environment",
|
| 123 |
)
|
| 124 |
+
|
| 125 |
objectsFound: List[str] = Field(
|
| 126 |
default_factory=list,
|
| 127 |
description="List of objects found in the environment",
|
| 128 |
)
|
| 129 |
+
|
| 130 |
reward: float = Field(
|
| 131 |
default=0.0, description="Reward received after taking the action"
|
| 132 |
)
|
| 133 |
+
|
| 134 |
isDone: bool = Field(default=False, description="Whether the episode has ended")
|
| 135 |
|
| 136 |
ObjectsPresent: Dict[str, Tuple[int, int, int, bool]] = Field(
|
|
|
|
| 138 |
description="Placed objects and their current positions in the environment",
|
| 139 |
)
|
| 140 |
|
| 141 |
+
ObjectsPlaced: Dict[str, Tuple[int, int, int, bool]] = Field(
|
| 142 |
+
default_factory=dict,
|
| 143 |
+
description="Objects that have been successfully placed in the environment",
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
rewardFeedback: list[str] = Field(
|
| 147 |
default_factory=list,
|
| 148 |
description="List of feedback strings describing the reward received after taking the action",
|
|
|
|
| 152 |
default_factory=list,
|
| 153 |
description="List of reward values received after taking the action",
|
| 154 |
)
|
| 155 |
+
|
| 156 |
+
numberPlaced: int = Field(
|
| 157 |
+
default=0,
|
| 158 |
+
description="Number of objects successfully placed in the environment",
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
rewardListSegment: list[float] = Field(
|
| 162 |
+
default_factory=list,
|
| 163 |
+
description="List of reward values received after taking the action",
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
rewardFeedbackSegment: list[str] = Field(
|
| 167 |
+
default_factory=list,
|
| 168 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
rewardListPlace: list[float] = Field(
|
| 172 |
+
default_factory=list,
|
| 173 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
rewardFeedbackPlace: list[str] = Field(
|
| 177 |
+
default_factory=list,
|
| 178 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
rewardListAdjust: list[float] = Field(
|
| 182 |
+
default_factory=list,
|
| 183 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
rewardFeedbackAdjust: list[str] = Field(
|
| 187 |
+
default_factory=list,
|
| 188 |
+
description="List of feedback strings describing the reward received after taking the action",
|
| 189 |
+
)
|
server/app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
try:
|
| 2 |
from openenv.core.env_server.http_server import create_app
|
| 3 |
-
except Exception as e:
|
| 4 |
raise ImportError(
|
| 5 |
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 6 |
) from e
|
|
@@ -18,21 +18,21 @@ app = create_app(
|
|
| 18 |
AppAction,
|
| 19 |
AppObservation,
|
| 20 |
env_name="app",
|
| 21 |
-
max_concurrent_envs=1,
|
| 22 |
)
|
| 23 |
|
| 24 |
|
| 25 |
-
@app.get("/health")
|
| 26 |
-
def health() -> dict[str, str]:
|
| 27 |
-
return {"status": "ok"}
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
@app.get("/")
|
| 31 |
-
def root() -> dict[str, str]:
|
| 32 |
-
return {
|
| 33 |
-
"message": "Object Placer API is running",
|
| 34 |
-
"health": "/health",
|
| 35 |
-
}
|
| 36 |
|
| 37 |
|
| 38 |
def main(host: str = "0.0.0.0", port: int = 8000):
|
|
|
|
| 1 |
try:
|
| 2 |
from openenv.core.env_server.http_server import create_app
|
| 3 |
+
except Exception as e:
|
| 4 |
raise ImportError(
|
| 5 |
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 6 |
) from e
|
|
|
|
| 18 |
AppAction,
|
| 19 |
AppObservation,
|
| 20 |
env_name="app",
|
| 21 |
+
max_concurrent_envs=1,
|
| 22 |
)
|
| 23 |
|
| 24 |
|
| 25 |
+
# @app.get("/health")
|
| 26 |
+
# def health() -> dict[str, str]:
|
| 27 |
+
# return {"status": "ok"}
|
| 28 |
+
#
|
| 29 |
+
#
|
| 30 |
+
# @app.get("/")
|
| 31 |
+
# def root() -> dict[str, str]:
|
| 32 |
+
# return {
|
| 33 |
+
# "message": "Object Placer API is running",
|
| 34 |
+
# "health": "/health",
|
| 35 |
+
# }
|
| 36 |
|
| 37 |
|
| 38 |
def main(host: str = "0.0.0.0", port: int = 8000):
|
server/app_environment.py
CHANGED
|
@@ -22,6 +22,7 @@ class AppEnvironment(Environment):
|
|
| 22 |
self._reset_count = 0
|
| 23 |
|
| 24 |
def _coerce_state(self) -> AppState:
|
|
|
|
| 25 |
if isinstance(self._state, AppState):
|
| 26 |
return self._state
|
| 27 |
|
|
@@ -46,8 +47,16 @@ class AppEnvironment(Environment):
|
|
| 46 |
reward=0.0,
|
| 47 |
isDone=False,
|
| 48 |
ObjectsPresent=placed,
|
|
|
|
| 49 |
rewardFeedback=[],
|
| 50 |
rewardList=[],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
)
|
| 52 |
|
| 53 |
def reset(self) -> AppObservation:
|
|
@@ -62,6 +71,14 @@ class AppEnvironment(Environment):
|
|
| 62 |
isDone=self._state.isDone,
|
| 63 |
rewardFeedback=self._state.rewardFeedback,
|
| 64 |
rewardList=self._state.rewardList,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
)
|
| 66 |
|
| 67 |
def step(self, action: AppAction) -> AppObservation:
|
|
@@ -77,6 +94,7 @@ class AppEnvironment(Environment):
|
|
| 77 |
reward -= 10.0
|
| 78 |
appendRewardFeedback(
|
| 79 |
state,
|
|
|
|
| 80 |
"No action is of invalid schema or format. Penalty applied.",
|
| 81 |
reward,
|
| 82 |
)
|
|
@@ -89,24 +107,51 @@ class AppEnvironment(Environment):
|
|
| 89 |
isDone=state.isDone,
|
| 90 |
rewardFeedback=state.rewardFeedback,
|
| 91 |
rewardList=state.rewardList,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
)
|
| 93 |
|
| 94 |
if action.isSegmentation and action is not None:
|
| 95 |
reward += 10.0
|
| 96 |
-
appendRewardFeedback(state, "Segmentation successful.", reward)
|
| 97 |
|
| 98 |
if action.placement and action is not None:
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
if action.findObjects and action is not None:
|
| 103 |
reward += findobject(action.isSegmentation, action.findObjects, state)
|
| 104 |
-
appendRewardFeedback(state, "Object found successfully.", reward)
|
| 105 |
|
| 106 |
-
if
|
|
|
|
|
|
|
|
|
|
| 107 |
state.isDone = True
|
| 108 |
reward += 10.0
|
| 109 |
-
appendRewardFeedback(
|
|
|
|
|
|
|
| 110 |
|
| 111 |
state.reward += reward / (10**state.step_count)
|
| 112 |
|
|
@@ -119,6 +164,14 @@ class AppEnvironment(Environment):
|
|
| 119 |
isDone=state.isDone,
|
| 120 |
rewardFeedback=state.rewardFeedback,
|
| 121 |
rewardList=state.rewardList,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
)
|
| 123 |
|
| 124 |
@property
|
|
|
|
| 22 |
self._reset_count = 0
|
| 23 |
|
| 24 |
def _coerce_state(self) -> AppState:
|
| 25 |
+
|
| 26 |
if isinstance(self._state, AppState):
|
| 27 |
return self._state
|
| 28 |
|
|
|
|
| 47 |
reward=0.0,
|
| 48 |
isDone=False,
|
| 49 |
ObjectsPresent=placed,
|
| 50 |
+
ObjectsPlaced={},
|
| 51 |
rewardFeedback=[],
|
| 52 |
rewardList=[],
|
| 53 |
+
numberPlaced=0,
|
| 54 |
+
rewardListSegment=[],
|
| 55 |
+
rewardFeedbackSegment=[],
|
| 56 |
+
rewardListPlace=[],
|
| 57 |
+
rewardFeedbackPlace=[],
|
| 58 |
+
rewardListAdjust=[],
|
| 59 |
+
rewardFeedbackAdjust=[],
|
| 60 |
)
|
| 61 |
|
| 62 |
def reset(self) -> AppObservation:
|
|
|
|
| 71 |
isDone=self._state.isDone,
|
| 72 |
rewardFeedback=self._state.rewardFeedback,
|
| 73 |
rewardList=self._state.rewardList,
|
| 74 |
+
numberPlaced=self._state.numberPlaced,
|
| 75 |
+
ObjectsPlaced=self._state.ObjectsPlaced,
|
| 76 |
+
rewardListSegment=self._state.rewardListSegment,
|
| 77 |
+
rewardFeedbackSegment=self._state.rewardFeedbackSegment,
|
| 78 |
+
rewardListPlace=self._state.rewardListPlace,
|
| 79 |
+
rewardFeedbackPlace=self._state.rewardFeedbackPlace,
|
| 80 |
+
rewardListAdjust=self._state.rewardListAdjust,
|
| 81 |
+
rewardFeedbackAdjust=self._state.rewardFeedbackAdjust,
|
| 82 |
)
|
| 83 |
|
| 84 |
def step(self, action: AppAction) -> AppObservation:
|
|
|
|
| 94 |
reward -= 10.0
|
| 95 |
appendRewardFeedback(
|
| 96 |
state,
|
| 97 |
+
"",
|
| 98 |
"No action is of invalid schema or format. Penalty applied.",
|
| 99 |
reward,
|
| 100 |
)
|
|
|
|
| 107 |
isDone=state.isDone,
|
| 108 |
rewardFeedback=state.rewardFeedback,
|
| 109 |
rewardList=state.rewardList,
|
| 110 |
+
numberPlaced=state.numberPlaced,
|
| 111 |
+
ObjectsPlaced=state.ObjectsPlaced,
|
| 112 |
+
rewardListSegment=state.rewardListSegment,
|
| 113 |
+
rewardFeedbackSegment=state.rewardFeedbackSegment,
|
| 114 |
+
rewardListPlace=state.rewardListPlace,
|
| 115 |
+
rewardFeedbackPlace=state.rewardFeedbackPlace,
|
| 116 |
+
rewardListAdjust=state.rewardListAdjust,
|
| 117 |
+
rewardFeedbackAdjust=state.rewardFeedbackAdjust,
|
| 118 |
)
|
| 119 |
|
| 120 |
if action.isSegmentation and action is not None:
|
| 121 |
reward += 10.0
|
| 122 |
+
appendRewardFeedback(state, "segment", "Segmentation successful.", reward)
|
| 123 |
|
| 124 |
if action.placement and action is not None:
|
| 125 |
+
placement_reward, placement_failed = place(
|
| 126 |
+
action.isSegmentation, action.placement, state
|
| 127 |
+
)
|
| 128 |
+
reward += placement_reward
|
| 129 |
+
if placement_failed:
|
| 130 |
+
appendRewardFeedback(state, "place", "Failed to place object.", reward)
|
| 131 |
+
else:
|
| 132 |
+
appendRewardFeedback(
|
| 133 |
+
state, "place", "Object placed successfully.", reward
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
if action.adjust and action is not None:
|
| 137 |
+
reward += adjustment(action.isSegmentation, action.adjust, state)
|
| 138 |
+
appendRewardFeedback(
|
| 139 |
+
state, "adjust", "Object adjusted successfully.", reward
|
| 140 |
+
)
|
| 141 |
|
| 142 |
if action.findObjects and action is not None:
|
| 143 |
reward += findobject(action.isSegmentation, action.findObjects, state)
|
| 144 |
+
appendRewardFeedback(state, "segment", "Object found successfully.", reward)
|
| 145 |
|
| 146 |
+
if (
|
| 147 |
+
len(state.objectsLeft) == 0
|
| 148 |
+
and len(state.ObjectsPresent) == state.numberPlaced
|
| 149 |
+
):
|
| 150 |
state.isDone = True
|
| 151 |
reward += 10.0
|
| 152 |
+
appendRewardFeedback(
|
| 153 |
+
state, "segment", "All objects found. Episode completed!", reward
|
| 154 |
+
)
|
| 155 |
|
| 156 |
state.reward += reward / (10**state.step_count)
|
| 157 |
|
|
|
|
| 164 |
isDone=state.isDone,
|
| 165 |
rewardFeedback=state.rewardFeedback,
|
| 166 |
rewardList=state.rewardList,
|
| 167 |
+
numberPlaced=state.numberPlaced,
|
| 168 |
+
ObjectsPlaced=state.ObjectsPlaced,
|
| 169 |
+
rewardListSegment=state.rewardListSegment,
|
| 170 |
+
rewardFeedbackSegment=state.rewardFeedbackSegment,
|
| 171 |
+
rewardListPlace=state.rewardListPlace,
|
| 172 |
+
rewardFeedbackPlace=state.rewardFeedbackPlace,
|
| 173 |
+
rewardListAdjust=state.rewardListAdjust,
|
| 174 |
+
rewardFeedbackAdjust=state.rewardFeedbackAdjust,
|
| 175 |
)
|
| 176 |
|
| 177 |
@property
|
utils.py
CHANGED
|
@@ -47,10 +47,29 @@ OBJECT_NAMES = [
|
|
| 47 |
"pouch",
|
| 48 |
]
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
state.rewardList.append(reward)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
def initDimentions(obj):
|
|
@@ -124,7 +143,7 @@ def initGrid():
|
|
| 124 |
|
| 125 |
def initWeightedGrid(shape=None):
|
| 126 |
if shape is None:
|
| 127 |
-
shape = (randint(
|
| 128 |
|
| 129 |
grid = random.uniform(0, 1, shape)
|
| 130 |
|
|
@@ -157,21 +176,27 @@ def _get_weight_value(weight, x, y, z):
|
|
| 157 |
def place(segment, objects, state):
|
| 158 |
dims = state.currentGrid
|
| 159 |
weight = state.weightedGrid
|
|
|
|
|
|
|
| 160 |
reward = 0.0
|
| 161 |
totalObjs = len(objects)
|
| 162 |
reward_per_obj_placed = 45.0 / totalObjs
|
| 163 |
|
| 164 |
-
if segment
|
| 165 |
appendRewardFeedback(
|
| 166 |
-
state, "Placing objects
|
| 167 |
)
|
| 168 |
return -60.0
|
| 169 |
|
| 170 |
for obj_name, pos in objects.items():
|
|
|
|
| 171 |
obj = OBJECTS.get(obj_name)
|
| 172 |
if obj is None:
|
| 173 |
appendRewardFeedback(
|
| 174 |
-
state,
|
|
|
|
|
|
|
|
|
|
| 175 |
)
|
| 176 |
reward -= reward_per_obj_placed
|
| 177 |
continue
|
|
@@ -190,6 +215,7 @@ def place(segment, objects, state):
|
|
| 190 |
reward -= reward_per_obj_placed
|
| 191 |
appendRewardFeedback(
|
| 192 |
state,
|
|
|
|
| 193 |
f"Object '{obj_name}' placement is out of bounds.",
|
| 194 |
-reward_per_obj_placed,
|
| 195 |
)
|
|
@@ -200,6 +226,7 @@ def place(segment, objects, state):
|
|
| 200 |
reward -= reward_per_obj_placed
|
| 201 |
appendRewardFeedback(
|
| 202 |
state,
|
|
|
|
| 203 |
f"Object '{obj_name}' placement overlaps with another object and stacking is not allowed.",
|
| 204 |
-reward_per_obj_placed,
|
| 205 |
)
|
|
@@ -223,6 +250,7 @@ def place(segment, objects, state):
|
|
| 223 |
reward += bonus
|
| 224 |
appendRewardFeedback(
|
| 225 |
state,
|
|
|
|
| 226 |
f"Object '{obj_name}' placed with stacking. Bonus: {bonus:.2f}",
|
| 227 |
bonus,
|
| 228 |
)
|
|
@@ -230,6 +258,7 @@ def place(segment, objects, state):
|
|
| 230 |
reward -= reward_per_obj_placed
|
| 231 |
appendRewardFeedback(
|
| 232 |
state,
|
|
|
|
| 233 |
f"Object '{obj_name}' placement failed. No space for stacking.",
|
| 234 |
-reward_per_obj_placed,
|
| 235 |
)
|
|
@@ -245,6 +274,7 @@ def place(segment, objects, state):
|
|
| 245 |
reward += bonus
|
| 246 |
appendRewardFeedback(
|
| 247 |
state,
|
|
|
|
| 248 |
f"Object '{obj_name}' placed successfully. Bonus: {bonus:.2f}",
|
| 249 |
bonus,
|
| 250 |
)
|
|
@@ -254,16 +284,48 @@ def place(segment, objects, state):
|
|
| 254 |
break
|
| 255 |
|
| 256 |
if not placement_failed:
|
| 257 |
-
state.
|
| 258 |
-
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
|
| 262 |
def findobject(segment, objects, state):
|
| 263 |
|
| 264 |
if not segment or segment is None:
|
| 265 |
appendRewardFeedback(
|
| 266 |
-
state,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
)
|
| 268 |
return -60.0
|
| 269 |
|
|
@@ -275,7 +337,10 @@ def findobject(segment, objects, state):
|
|
| 275 |
if pos_real is None:
|
| 276 |
reward -= glMetric
|
| 277 |
appendRewardFeedback(
|
| 278 |
-
state,
|
|
|
|
|
|
|
|
|
|
| 279 |
)
|
| 280 |
continue
|
| 281 |
|
|
@@ -283,6 +348,7 @@ def findobject(segment, objects, state):
|
|
| 283 |
reward += glMetric
|
| 284 |
appendRewardFeedback(
|
| 285 |
state,
|
|
|
|
| 286 |
f"Object '{obj_found}' found with correct position and stacking.",
|
| 287 |
glMetric,
|
| 288 |
)
|
|
@@ -292,6 +358,7 @@ def findobject(segment, objects, state):
|
|
| 292 |
reward -= mse
|
| 293 |
appendRewardFeedback(
|
| 294 |
state,
|
|
|
|
| 295 |
f"Object '{obj_found}' found with incorrect position. MSE: {mse:.2f}",
|
| 296 |
-mse,
|
| 297 |
)
|
|
@@ -300,6 +367,7 @@ def findobject(segment, objects, state):
|
|
| 300 |
reward -= glMetric / 4.0
|
| 301 |
appendRewardFeedback(
|
| 302 |
state,
|
|
|
|
| 303 |
f"Object '{obj_found}' found with incorrect stacking. Penalty: {glMetric / 4.0}",
|
| 304 |
-glMetric / 4.0,
|
| 305 |
)
|
|
@@ -307,6 +375,7 @@ def findobject(segment, objects, state):
|
|
| 307 |
reward += glMetric / 4.0
|
| 308 |
appendRewardFeedback(
|
| 309 |
state,
|
|
|
|
| 310 |
f"Object '{obj_found}' found with correct stacking. Bonus: {glMetric / 4.0}",
|
| 311 |
glMetric / 4.0,
|
| 312 |
)
|
|
@@ -316,3 +385,96 @@ def findobject(segment, objects, state):
|
|
| 316 |
state.objectsFound.append(obj)
|
| 317 |
|
| 318 |
return reward
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
"pouch",
|
| 48 |
]
|
| 49 |
|
| 50 |
+
ACTION_CONFIG = {
|
| 51 |
+
"RIGHT": [1, 0, 0],
|
| 52 |
+
"LEFT": [-1, 0, 0],
|
| 53 |
+
"UP": [0, 1, 0],
|
| 54 |
+
"DOWN": [0, -1, 0],
|
| 55 |
+
"FORWARD": [0, 0, 1],
|
| 56 |
+
"BACKWARD": [0, 0, -1],
|
| 57 |
+
"ROTATE": [0, 0, 0],
|
| 58 |
+
}
|
| 59 |
|
| 60 |
+
|
| 61 |
+
def appendRewardFeedback(state, choice, feedback, reward):
|
| 62 |
state.rewardList.append(reward)
|
| 63 |
+
state.rewardFeedback.append(feedback)
|
| 64 |
+
if choice == "segment":
|
| 65 |
+
state.rewardFeedbackSegment.append(feedback)
|
| 66 |
+
state.rewardListSegment.append(reward)
|
| 67 |
+
elif choice == "place":
|
| 68 |
+
state.rewardFeedbackPlace.append(feedback)
|
| 69 |
+
state.rewardListPlace.append(reward)
|
| 70 |
+
elif choice == "adjust":
|
| 71 |
+
state.rewardFeedbackAdjust.append(feedback)
|
| 72 |
+
state.rewardListAdjust.append(reward)
|
| 73 |
|
| 74 |
|
| 75 |
def initDimentions(obj):
|
|
|
|
| 143 |
|
| 144 |
def initWeightedGrid(shape=None):
|
| 145 |
if shape is None:
|
| 146 |
+
shape = (randint(8, 12), randint(8, 12), randint(8, 12))
|
| 147 |
|
| 148 |
grid = random.uniform(0, 1, shape)
|
| 149 |
|
|
|
|
| 176 |
def place(segment, objects, state):
|
| 177 |
dims = state.currentGrid
|
| 178 |
weight = state.weightedGrid
|
| 179 |
+
objsPresent = state.ObjectsPresent
|
| 180 |
+
|
| 181 |
reward = 0.0
|
| 182 |
totalObjs = len(objects)
|
| 183 |
reward_per_obj_placed = 45.0 / totalObjs
|
| 184 |
|
| 185 |
+
if segment:
|
| 186 |
appendRewardFeedback(
|
| 187 |
+
state, "place", "Placing objects with segmentation is not allowed.", -60.0
|
| 188 |
)
|
| 189 |
return -60.0
|
| 190 |
|
| 191 |
for obj_name, pos in objects.items():
|
| 192 |
+
|
| 193 |
obj = OBJECTS.get(obj_name)
|
| 194 |
if obj is None:
|
| 195 |
appendRewardFeedback(
|
| 196 |
+
state,
|
| 197 |
+
"place",
|
| 198 |
+
f"Object '{obj_name}' is not recognized.",
|
| 199 |
+
-reward_per_obj_placed,
|
| 200 |
)
|
| 201 |
reward -= reward_per_obj_placed
|
| 202 |
continue
|
|
|
|
| 215 |
reward -= reward_per_obj_placed
|
| 216 |
appendRewardFeedback(
|
| 217 |
state,
|
| 218 |
+
"place",
|
| 219 |
f"Object '{obj_name}' placement is out of bounds.",
|
| 220 |
-reward_per_obj_placed,
|
| 221 |
)
|
|
|
|
| 226 |
reward -= reward_per_obj_placed
|
| 227 |
appendRewardFeedback(
|
| 228 |
state,
|
| 229 |
+
"place",
|
| 230 |
f"Object '{obj_name}' placement overlaps with another object and stacking is not allowed.",
|
| 231 |
-reward_per_obj_placed,
|
| 232 |
)
|
|
|
|
| 250 |
reward += bonus
|
| 251 |
appendRewardFeedback(
|
| 252 |
state,
|
| 253 |
+
"place",
|
| 254 |
f"Object '{obj_name}' placed with stacking. Bonus: {bonus:.2f}",
|
| 255 |
bonus,
|
| 256 |
)
|
|
|
|
| 258 |
reward -= reward_per_obj_placed
|
| 259 |
appendRewardFeedback(
|
| 260 |
state,
|
| 261 |
+
"place",
|
| 262 |
f"Object '{obj_name}' placement failed. No space for stacking.",
|
| 263 |
-reward_per_obj_placed,
|
| 264 |
)
|
|
|
|
| 274 |
reward += bonus
|
| 275 |
appendRewardFeedback(
|
| 276 |
state,
|
| 277 |
+
"place",
|
| 278 |
f"Object '{obj_name}' placed successfully. Bonus: {bonus:.2f}",
|
| 279 |
bonus,
|
| 280 |
)
|
|
|
|
| 284 |
break
|
| 285 |
|
| 286 |
if not placement_failed:
|
| 287 |
+
state.ObjectsPlaced[obj_name] = pos
|
| 288 |
+
state.numberPlaced += 1
|
| 289 |
+
try:
|
| 290 |
+
if objsPresent[obj_name] == state.ObjectsPlaced[obj_name]:
|
| 291 |
+
reward -= 45.0 / totalObjs
|
| 292 |
+
appendRewardFeedback(
|
| 293 |
+
state,
|
| 294 |
+
"place",
|
| 295 |
+
f"Object '{obj_name}' is being placed in the same location",
|
| 296 |
+
-reward_per_obj_placed,
|
| 297 |
+
)
|
| 298 |
+
except KeyError:
|
| 299 |
+
reward -= reward_per_obj_placed
|
| 300 |
+
appendRewardFeedback(
|
| 301 |
+
state,
|
| 302 |
+
"place",
|
| 303 |
+
f"Object '{obj_name}' is present in the environment, but is placed in same location as originally found.",
|
| 304 |
+
-reward_per_obj_placed,
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
continue
|
| 308 |
+
|
| 309 |
+
return (reward, placement_failed)
|
| 310 |
|
| 311 |
|
| 312 |
def findobject(segment, objects, state):
|
| 313 |
|
| 314 |
if not segment or segment is None:
|
| 315 |
appendRewardFeedback(
|
| 316 |
+
state,
|
| 317 |
+
"segment",
|
| 318 |
+
"Finding objects without segmentation is not allowed.",
|
| 319 |
+
-60.0,
|
| 320 |
+
)
|
| 321 |
+
return -60.0
|
| 322 |
+
|
| 323 |
+
if state.ObjectsPresent == state.objectsFound:
|
| 324 |
+
appendRewardFeedback(
|
| 325 |
+
state,
|
| 326 |
+
"segment",
|
| 327 |
+
"No point in finding more objects as all are already found Make the IsSegement attribute false and execute the place method.",
|
| 328 |
+
-60.0,
|
| 329 |
)
|
| 330 |
return -60.0
|
| 331 |
|
|
|
|
| 337 |
if pos_real is None:
|
| 338 |
reward -= glMetric
|
| 339 |
appendRewardFeedback(
|
| 340 |
+
state,
|
| 341 |
+
"segment",
|
| 342 |
+
f"Object '{obj_found}' not found in the environment.",
|
| 343 |
+
-glMetric,
|
| 344 |
)
|
| 345 |
continue
|
| 346 |
|
|
|
|
| 348 |
reward += glMetric
|
| 349 |
appendRewardFeedback(
|
| 350 |
state,
|
| 351 |
+
"segment",
|
| 352 |
f"Object '{obj_found}' found with correct position and stacking.",
|
| 353 |
glMetric,
|
| 354 |
)
|
|
|
|
| 358 |
reward -= mse
|
| 359 |
appendRewardFeedback(
|
| 360 |
state,
|
| 361 |
+
"segment",
|
| 362 |
f"Object '{obj_found}' found with incorrect position. MSE: {mse:.2f}",
|
| 363 |
-mse,
|
| 364 |
)
|
|
|
|
| 367 |
reward -= glMetric / 4.0
|
| 368 |
appendRewardFeedback(
|
| 369 |
state,
|
| 370 |
+
"segment",
|
| 371 |
f"Object '{obj_found}' found with incorrect stacking. Penalty: {glMetric / 4.0}",
|
| 372 |
-glMetric / 4.0,
|
| 373 |
)
|
|
|
|
| 375 |
reward += glMetric / 4.0
|
| 376 |
appendRewardFeedback(
|
| 377 |
state,
|
| 378 |
+
"segment",
|
| 379 |
f"Object '{obj_found}' found with correct stacking. Bonus: {glMetric / 4.0}",
|
| 380 |
glMetric / 4.0,
|
| 381 |
)
|
|
|
|
| 385 |
state.objectsFound.append(obj)
|
| 386 |
|
| 387 |
return reward
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
def _remove_object(state, obj_name):
|
| 391 |
+
reward = 0
|
| 392 |
+
try:
|
| 393 |
+
pos = state.ObjectsPlaced.pop(obj_name)
|
| 394 |
+
except KeyError:
|
| 395 |
+
reward -= 45.0 / len(state.ObjectsPresent)
|
| 396 |
+
appendRewardFeedback(
|
| 397 |
+
state,
|
| 398 |
+
"adjust",
|
| 399 |
+
f"Object '{obj_name}' is not placed in the environment.",
|
| 400 |
+
-reward,
|
| 401 |
+
)
|
| 402 |
+
return reward
|
| 403 |
+
|
| 404 |
+
state.numberPlaced -= 1
|
| 405 |
+
dims = state.currentGrid
|
| 406 |
+
obj = OBJECTS.get(obj_name)
|
| 407 |
+
objGrid = initDimentions(obj)
|
| 408 |
+
|
| 409 |
+
for i in range(len(objGrid)):
|
| 410 |
+
for j in range(len(objGrid[0])):
|
| 411 |
+
for k in range(len(objGrid[0][0])):
|
| 412 |
+
if dims[pos[0] + i][pos[1] + j][pos[2] + k] > 0:
|
| 413 |
+
dims[pos[0] + i][pos[1] + j][pos[2] + k] -= 1
|
| 414 |
+
|
| 415 |
+
|
| 416 |
+
def _adjustment_helper(state, name, pos, change, direction):
|
| 417 |
+
_remove_object(state, name)
|
| 418 |
+
|
| 419 |
+
if direction == "ROTATE":
|
| 420 |
+
newPos = (pos[1], pos[0], pos[2], pos[3])
|
| 421 |
+
else:
|
| 422 |
+
newPos = (pos[0] + change[0], pos[1] + change[1], pos[2] + change[2], pos[3])
|
| 423 |
+
|
| 424 |
+
reward, isNotPlaced = place(False, {name: newPos}, state)
|
| 425 |
+
|
| 426 |
+
if isNotPlaced:
|
| 427 |
+
dummyReward = place(False, {name: pos}, state)[0]
|
| 428 |
+
appendRewardFeedback(
|
| 429 |
+
state,
|
| 430 |
+
"adjust",
|
| 431 |
+
f"Failed to adjust object '{name}' in direction {direction}. Reverting to original position.",
|
| 432 |
+
-dummyReward,
|
| 433 |
+
)
|
| 434 |
+
return -dummyReward
|
| 435 |
+
|
| 436 |
+
appendRewardFeedback(
|
| 437 |
+
state,
|
| 438 |
+
"adjust",
|
| 439 |
+
f"Object '{name}' moved {direction} successfully.",
|
| 440 |
+
reward,
|
| 441 |
+
)
|
| 442 |
+
return reward
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
def adjustment(segment, action, state):
|
| 446 |
+
objsPlaced = state.ObjectsPlaced
|
| 447 |
+
|
| 448 |
+
if segment:
|
| 449 |
+
appendRewardFeedback(
|
| 450 |
+
state, "adjust", "Placing objects with segmentation is not allowed.", -60.0
|
| 451 |
+
)
|
| 452 |
+
return -60.0
|
| 453 |
+
|
| 454 |
+
try:
|
| 455 |
+
initPos = objsPlaced[action[0]]
|
| 456 |
+
name = action[0]
|
| 457 |
+
except KeyError:
|
| 458 |
+
reward_per_obj_placed = 45.0 / len(state.ObjectsPresent)
|
| 459 |
+
appendRewardFeedback(
|
| 460 |
+
state,
|
| 461 |
+
"adjust",
|
| 462 |
+
f"Object '{action[0]}' is not placed in the environment, so it cannot be adjusted.",
|
| 463 |
+
-reward_per_obj_placed,
|
| 464 |
+
)
|
| 465 |
+
return -reward_per_obj_placed
|
| 466 |
+
|
| 467 |
+
if action[1] in ACTION_CONFIG:
|
| 468 |
+
reward = _adjustment_helper(
|
| 469 |
+
state, name, initPos, ACTION_CONFIG.get(action[1]), action[1]
|
| 470 |
+
)
|
| 471 |
+
return reward
|
| 472 |
+
else:
|
| 473 |
+
reward_per_obj_placed = 45.0 / len(state.ObjectsPresent)
|
| 474 |
+
appendRewardFeedback(
|
| 475 |
+
state,
|
| 476 |
+
"adjust",
|
| 477 |
+
f"Invalid adjustment direction '{action[1]}'. Valid directions are RIGHT, LEFT, UP, DOWN, FORWARD, BACKWARD, ROTATE.",
|
| 478 |
+
-reward_per_obj_placed,
|
| 479 |
+
)
|
| 480 |
+
return -reward_per_obj_placed
|