h1manshu commited on
Commit
f913610
·
verified ·
1 Parent(s): bbf592c

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +15 -4
  2. client.py +1 -3
  3. inference.py +1 -4
  4. server/app.py +2 -2
  5. server/code_review_environment.py +1 -1
README.md CHANGED
@@ -144,13 +144,24 @@ Install the OpenEnv core package:
144
  ```bash
145
  pip install openenv-core
146
  ```
147
- # Terminal 1 — install packages
 
 
 
 
 
 
 
148
  uv pip install -e .
 
149
 
150
- # Terminal 1 — Run server locally
 
151
  uv run server --host 0.0.0.0 --port 8000
152
-
153
- # Terminal 2 — run the agent
 
 
154
  uv run python inference.py
155
  ```
156
 
 
144
  ```bash
145
  pip install openenv-core
146
  ```
147
+
148
+ Clone the repo
149
+ ```bash
150
+ git clone https://github.com/Ajay-Ganapathy/code_review && cd code_review
151
+ ```
152
+
153
+ Install packages
154
+ ```bash
155
  uv pip install -e .
156
+ ```
157
 
158
+ `[OPTIONAL]` To run server locally
159
+ ```bash
160
  uv run server --host 0.0.0.0 --port 8000
161
+ ```
162
+
163
+ Run the agent in another terminal
164
+ ```bash
165
  uv run python inference.py
166
  ```
167
 
client.py CHANGED
@@ -113,15 +113,13 @@ class CodeReviewEnv(EnvClient[CodeReviewAction, CodeReviewObservation, State]):
113
  # Handle reward (reset vs step)
114
  reward_data = payload.get("reward")
115
  reward = None
116
-
117
  if reward_data is not None:
118
  try:
119
  reward = float(reward_data)
120
  except Exception:
121
  reward = None
122
-
123
 
124
-
125
  return StepResult(
126
  observation=observation,
127
  reward=reward,
 
113
  # Handle reward (reset vs step)
114
  reward_data = payload.get("reward")
115
  reward = None
116
+
117
  if reward_data is not None:
118
  try:
119
  reward = float(reward_data)
120
  except Exception:
121
  reward = None
 
122
 
 
123
  return StepResult(
124
  observation=observation,
125
  reward=reward,
inference.py CHANGED
@@ -233,10 +233,7 @@ async def run_episode(client, env):
233
  reward = result.reward
234
  done = result.done
235
 
236
-
237
- log_step(
238
- step=step, action=response_text, reward=reward , done=done, error=None
239
- )
240
  final_score = max(final_score, reward if reward else 0.0)
241
 
242
  return final_score
 
233
  reward = result.reward
234
  done = result.done
235
 
236
+ log_step(step=step, action=response_text, reward=reward, done=done, error=None)
 
 
 
237
  final_score = max(final_score, reward if reward else 0.0)
238
 
239
  return final_score
server/app.py CHANGED
@@ -74,11 +74,11 @@ def main():
74
  import argparse
75
 
76
  parser = argparse.ArgumentParser()
77
- host = "0.0.0.0"
78
  parser.add_argument("--port", type=int, default=8000)
79
  args = parser.parse_args()
80
 
81
- uvicorn.run(app, host=host, port=args.port)
82
 
83
 
84
  if __name__ == "__main__":
 
74
  import argparse
75
 
76
  parser = argparse.ArgumentParser()
77
+ parser.add_argument("--host" , type=str , default="0.0.0.0")
78
  parser.add_argument("--port", type=int, default=8000)
79
  args = parser.parse_args()
80
 
81
+ uvicorn.run(app, host=args.host, port=args.port)
82
 
83
 
84
  if __name__ == "__main__":
server/code_review_environment.py CHANGED
@@ -218,7 +218,7 @@ class CodeReviewEnvironment(Environment):
218
  # print("Obs == " , obs)
219
 
220
  rew = CodeReviewReward(score=score, feedback="graded")
221
- print("Score == " , type(rew.score) , " --- " , rew.score)
222
 
223
  # print("FINAL REWARD TYPE:", type(rew))
224
  # print("FINAL REWARD:", rew)
 
218
  # print("Obs == " , obs)
219
 
220
  rew = CodeReviewReward(score=score, feedback="graded")
221
+ print("Score == ", type(rew.score), " --- ", rew.score)
222
 
223
  # print("FINAL REWARD TYPE:", type(rew))
224
  # print("FINAL REWARD:", rew)