diff --git "a/htmlcov/z_357ee38f49d3e320_action_generator_py.html" "b/htmlcov/z_357ee38f49d3e320_action_generator_py.html" new file mode 100644--- /dev/null +++ "b/htmlcov/z_357ee38f49d3e320_action_generator_py.html" @@ -0,0 +1,629 @@ + + + + + Coverage for tinytroupe/agent/action_generator.py: 0% + + + + + +
+
+

+ Coverage for tinytroupe / agent / action_generator.py: + 0% +

+ +

+ 187 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.4, + created at 2026-02-28 17:48 +0000 +

+ +
+
+
+

1import json 

+

2import statistics # Add this import 

+

3 

+

4import tinytroupe.utils as utils 

+

5from tinytroupe.control import transactional, current_simulation 

+

6import tinytroupe.openai_utils as openai_utils 

+

7from tinytroupe.validation import propositions 

+

8from tinytroupe.utils import JsonSerializableRegistry 

+

9from tinytroupe.experimentation import Proposition 

+

10 

+

11 

+

12class ActionGenerator(JsonSerializableRegistry): 

+

13 

+

14 def __init__(self, max_attempts=2, 

+

15 enable_quality_checks=True, 

+

16 enable_regeneration=True, 

+

17 enable_direct_correction=False, # TODO enable_direct_correction not working very well yet 

+

18 enable_quality_check_for_persona_adherence=True, 

+

19 enable_quality_check_for_selfconsistency=True, 

+

20 enable_quality_check_for_fluency=True, 

+

21 enable_quality_check_for_suitability=False, 

+

22 enable_quality_check_for_similarity=False, 

+

23 continue_on_failure=True, 

+

24 quality_threshold=7, 

+

25 max_action_similarity=0.6, 

+

26 enable_reasoning_step=False): # TODO enable_reasoning_step not working very well yet 

+

27 """ 

+

28 Initializes the ActionGenerator. 

+

29 

+

30 Args: 

+

31 max_attempts (int): The maximum number of attempts to generate an action. 

+

32 enable_quality_checks (bool): Whether to perform quality checks on the generated action. If False, the first action generated 

+

33 is returned without any checks. 

+

34 enable_regeneration (bool): Whether to try to make the agent regenerate the action if the first attempt fails. 

+

35 enable_direct_correction (bool): Whether to directly correct the action if the first attempt fails, without asking the agent to regenerate it. 

+

36 enable_quality_check_for_persona_adherence (bool): Whether to check the action for persona adherence. 

+

37 enable_quality_check_for_selfconsistency (bool): Whether to check the action for self-consistency. 

+

38 enable_quality_check_for_fluency (bool): Whether to check the action for fluency. 

+

39 enable_quality_check_for_suitability (bool): Whether to check the action for suitability. 

+

40 continue_on_failure (bool): Whether to return the last tentative action, even if it fails to pass quality checks. 

+

41 Presumably, the last tentative action is the one that is most likely to be correct, since it has gone through the most iterations of regeneration and correction. 

+

42 quality_threshold (int): The minimum score for each quality check for the action to be considered good quality. 

+

43 enable_reasoning_step (bool): Whether to enable reasoning step in the action generation process. This IS NOT the use of "reasoning models" (e.g., o1, o3), 

+

44 but rather the use of an additional reasoning step in the regular text completion. 

+

45 """ 

+

46 

+

47 self.max_attempts = max_attempts 

+

48 self.regeneration_attempts = 0 

+

49 self.direct_correction_attempts = 0 

+

50 

+

51 self.enable_quality_checks = enable_quality_checks 

+

52 self.enable_regeneration = enable_regeneration 

+

53 self.enable_direct_correction = enable_direct_correction 

+

54 

+

55 self.enable_quality_check_for_persona_adherence = enable_quality_check_for_persona_adherence 

+

56 self.enable_quality_check_for_selfconsistency = enable_quality_check_for_selfconsistency 

+

57 self.enable_quality_check_for_fluency = enable_quality_check_for_fluency 

+

58 self.enable_quality_check_for_suitability = enable_quality_check_for_suitability 

+

59 self.enable_quality_check_for_similarity = enable_quality_check_for_similarity 

+

60 

+

61 self.continue_on_failure = continue_on_failure 

+

62 self.quality_threshold = quality_threshold 

+

63 self.max_action_similarity = max_action_similarity 

+

64 

+

65 self.enable_reasoning_step = enable_reasoning_step 

+

66 

+

67 # This generator has its own copies of the propositions, in order to be able to isolate them 

+

68 # from other agents, particularly when running the simulation in parallel. 

+

69 self.action_persona_adherence = propositions.hard_action_persona_adherence.copy() 

+

70 self.action_self_consistency = propositions.action_self_consistency.copy() 

+

71 self.action_fluency = propositions.action_fluency.copy() 

+

72 self.action_suitability = propositions.action_suitability.copy() 

+

73 

+

74 # initialize statistics  

+

75 self.regeneration_failures = 0 

+

76 self.direct_correction_failures = 0 

+

77 self.regeneration_scores = [] 

+

78 self.direct_correction_scores = [] 

+

79 self.total_actions_produced = 0 

+

80 self.total_original_actions_succeeded = 0 

+

81 

+

82 def generate_next_action(self, agent, current_messages:list): 

+

83 

+

84 from tinytroupe.agent import logger # import here to avoid circular import issues 

+

85 

+

86 # clean up (remove unnecessary elements) and copy the list of current messages to avoid modifying the original ones 

+

87 current_messages = [ 

+

88 {"role": msg["role"], "content": json.dumps(msg["content"])} 

+

89 for msg in current_messages 

+

90 ] 

+

91 

+

92 # starts with no feedback 

+

93 cur_feedback = None 

+

94 all_negative_feedbacks = [] 

+

95 

+

96 best_action = None 

+

97 best_role = None 

+

98 best_content = None 

+

99 best_score = float('-inf') 

+

100 original_score = None 

+

101 

+

102 def update_best(tentative_action, role, content, total_score): 

+

103 nonlocal best_action, best_role, best_content, best_score 

+

104 if total_score > best_score: 

+

105 best_action = tentative_action 

+

106 best_role = role 

+

107 best_content = content 

+

108 best_score = total_score 

+

109 

+

110 def finish_return(tentative_action, role, content, final_score): 

+

111 if original_score is not None and final_score > original_score: 

+

112 logger.warning(f"[{agent.name}] improved total quality from {original_score} to {final_score}") 

+

113 

+

114 # ensure that tentative_action and content are dicts 

+

115 if isinstance(tentative_action, str): 

+

116 tentative_action = json.loads(tentative_action) 

+

117 if isinstance(content, str): 

+

118 content = json.loads(content) 

+

119 

+

120 return tentative_action, role, content, all_negative_feedbacks 

+

121 

+

122 # First attempt to generate an action 

+

123 tentative_action, role, content = self._generate_tentative_action(agent, current_messages, 

+

124 feedback_from_previous_attempt=cur_feedback, 

+

125 previous_tentative_action=None, 

+

126 previous_llm_role=None, previous_llm_content=None) 

+

127 

+

128 if self.enable_quality_checks: 

+

129 # First quality check 

+

130 good_quality, total_score, cur_feedback = self._check_action_quality("Original Action", agent, tentative_action=tentative_action) 

+

131 update_best(tentative_action, role, content, total_score) 

+

132 if original_score is None: 

+

133 original_score = total_score 

+

134 if good_quality: 

+

135 self.total_original_actions_succeeded += 1 

+

136 # Found a good action, let's return it now 

+

137 return finish_return(tentative_action, role, content, total_score) 

+

138 else: 

+

139 logger.warning(f"[{agent.name}] Original action did not pass quality checks: {cur_feedback}") 

+

140 all_negative_feedbacks.append(cur_feedback) 

+

141 

+

142 

+

143 # GENERATE AND REGENERATE the action by the agent  

+

144 # 

+

145 # We first try to make the agent generate (via the current_messages passed) or regenerate the  

+

146 # action based on feedback. 

+

147 if self.enable_regeneration: 

+

148 for attempt in range(self.max_attempts): 

+

149 

+

150 # Generate tentative action 

+

151 tentative_action, role, content = self._generate_tentative_action(agent, current_messages, 

+

152 feedback_from_previous_attempt=cur_feedback, 

+

153 previous_tentative_action=tentative_action, 

+

154 previous_llm_role=role, previous_llm_content=content) 

+

155 logger.debug(f"[{agent.name}] Tentative action: {tentative_action}") 

+

156 self.regeneration_attempts += 1 

+

157 

+

158 good_quality, total_score, cur_feedback = self._check_action_quality(f"Action Regeneration ({attempt})", agent, tentative_action=tentative_action) 

+

159 update_best(tentative_action, role, content, total_score) 

+

160 if good_quality: 

+

161 # Found a good action, let's return it now 

+

162 return finish_return(tentative_action, role, content, total_score) 

+

163 else: 

+

164 self.regeneration_failures += 1 

+

165 self.regeneration_scores.append(total_score) # Assuming feedback contains a score 

+

166 all_negative_feedbacks.append(cur_feedback) 

+

167 

+

168 # CORRECT OR REPHRASE the action directly 

+

169 # 

+

170 # If we got here, it means the agent was not able to directly generate an action 

+

171 # of sufficient quality, so we'll try to rephrase it correctly directly now. 

+

172 if self.enable_direct_correction: 

+

173 for attempt in range(self.max_attempts): 

+

174 tentative_action, role, content = self._correct_action(tentative_action, feedback=cur_feedback, llm_role=role, llm_content=content) 

+

175 logger.warning(f"[{agent.name}] Rephrased the action directly as: {tentative_action}") 

+

176 self.direct_correction_attempts += 1 

+

177 

+

178 good_quality, total_score, cur_feedback = self._check_action_quality(f"Direct Action Correction or Rephrasing ({attempt})", agent, tentative_action=tentative_action) 

+

179 update_best(tentative_action, role, content, total_score) 

+

180 if good_quality: 

+

181 # Found a good action, let's return it now 

+

182 return finish_return(tentative_action, role, content, total_score) 

+

183 else: 

+

184 self.direct_correction_failures += 1 

+

185 self.direct_correction_scores.append(total_score) # Assuming feedback contains a score 

+

186 all_negative_feedbacks.append(cur_feedback) 

+

187 

+

188 # If we got here, all attempts to generate a good action failed 

+

189 if self.continue_on_failure: 

+

190 logger.warning(f"[{agent.name}] All attempts to generate a good action failed. Returning the best one.") 

+

191 return finish_return(best_action, best_role, best_content, best_score) 

+

192 

+

193 else: 

+

194 raise PoorQualityActionException() 

+

195 

+

196 else: 

+

197 # If we got here, it means that the action was generated without quality checks 

+

198 # and we are not doing any regeneration or direct correction, so we can return it now. 

+

199 return tentative_action, role, content, [] 

+

200 

+

201 def _generate_tentative_action(self, agent, current_messages, feedback_from_previous_attempt=None, 

+

202 previous_tentative_action=None, 

+

203 previous_llm_role=None, previous_llm_content=None): 

+

204 

+

205 from tinytroupe.agent import logger, CognitiveActionModel, CognitiveActionModelWithReasoning # import here to avoid circular import issues 

+

206 

+

207 self.total_actions_produced += 1 

+

208 

+

209 # shallow clone current_messages 

+

210 current_messages_context = current_messages.copy() 

+

211 

+

212 logger.debug(f"[{agent.name}] Sending messages to OpenAI API") 

+

213 logger.debug(f"[{agent.name}] Last interaction: {current_messages[-1]}") 

+

214 

+

215 if feedback_from_previous_attempt: 

+

216 #current_messages_copy.append({"role": previous_llm_role, 

+

217 # "content": "TENTATIVE ACTION:" + json.dumps(previous_llm_content)}) 

+

218 

+

219 current_messages_context.append({"role": "user", 

+

220 "content": \ 

+

221 f""" 

+

222 WARNING! TENTATIVE ACTION GENERATION FAILED IN QUALITY CHECKS! 

+

223 

+

224 You were about to produce the following action, as a sequence for the previous actions or feedbacks (if any): 

+

225 ``` 

+

226 {previous_tentative_action} 

+

227 ``` 

+

228  

+

229 However, it failed to pass the quality checks (as described in the quality feedback below), and therefore it was aborted and not added 

+

230 to the simulation trajectory. 

+

231 

+

232 Now you **must** try again to generate a **BETTER** action, such that the quality issues mentioned in the feedback are addressed, 

+

233 or instead issue a DONE action and stop for this turn if it is unclear how to improve quality.  

+

234 Your objective is to **PASS** the quality checks this time if possible. 

+

235 

+

236 You can choose either to FIX somehow the action you were about to produce, or to generate something COMPLETELY NEW and DIFFERENT.  

+

237 Each time your tentative action fail a quality check, you should be MORE RADICAL in your changes, and try to produce  

+

238 something that is **very** different from the previous attempts. 

+

239 

+

240 If it is unclear how to produce a better action, you can choose to issue a DONE action instead.  

+

241 **It is better to stop acting than to act poorly.** 

+

242  

+

243 In general, desireable properties of the action are: 

+

244 - The action is consistent with the agent's persona, it is what one would expect from the agent given its persona. 

+

245 - The action is self-consistent, it does contradict the agent's previous actions. 

+

246 - The action is fluent and natural, and does not repeat itself or use overly formulaic language. 

+

247  

+

248 {feedback_from_previous_attempt} 

+

249 """}) 

+

250 

+

251 current_messages_context.append({"role": "system", 

+

252 "content": "Now generate a better action based on the above feedback, or issue a DONE action if it is unclear how to improve quality."}) 

+

253 

+

254 

+

255 

+

256 # TODO: remind the model of some key rules to follow? 

+

257 # 

+

258 # 

+

259 #current_messages_context.append({"role": "user",  

+

260 # "content": """ 

+

261 # Now you must generate a sequence of actions following the directives in your agent specification,  

+

262 # complying with **all** instructions and contraints related to the action you use. 

+

263 # In particular, to ensure the quality of your actions: 

+

264 # - **DO NOT** generate similar content in a row! We want human-like, natural and fluent behavior, and thus avoid#repeatitive behavior. 

+

265 # - THINK before taking further actions. 

+

266 # - Avoid thinking for too long, and actually take some concrete action before being done, particularly if you are expected to provide some action. 

+

267 # - Intercalate thinking with other actions. 

+

268 # - The new sequence of actions must be coherent and consistent with the previous actions and stimuli. For example, do not assume an expected or 

+

269 # desireable action already happened if that's not registered in the simulation history. 

+

270 # - If you received any quality feedback, you **MUST** take it into account and improve your performance. Your next actions 

+

271 # **must** be better than your previous ones if possible. 

+

272 #  

+

273 # If you can't produce a very good action, you may just issue a DONE action instead and remain silent. Rules to follow in #this case: 

+

274 # - It is better to remain silent than repeating similar actions or making other mistakes. 

+

275 # - Avoid remaining silent for too long (i.e., more than 3 times in a row), as this looks robotic and unnatural. If #necessary, you 

+

276 # can communicate your difficulties in coming up with a proper action, or just say something like "I don't know what to say". 

+

277 # - In case your thoughts or goals insistenly require you to **not** being quiet or silent, then you avoid just issuing #DONE if possible, 

+

278 # and try to produce a new action. In this case, the new action might refer to the difficulties you are having in #coming up with  

+

279 # a proper action in the first place. 

+

280 #  

+

281 # All of these actions **MUST** be rendered following the JSON specification perfectly, including all required keys (even #if their value is empty), **ALWAYS**. 

+

282 # """ 

+

283 # }) 

+

284# 

+

285 

+

286 current_messages_context.append({"role": "system", 

+

287 "content": "Remember: the action you will now generate **MUST** be a **well-formatted** and **valid** JSON object. No extra text, no extra brackets, commas, or other syntax errors."}) 

+

288 

+

289 if not self.enable_reasoning_step: 

+

290 logger.debug(f"[{agent.name}] Reasoning step disabled.") 

+

291 next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModel) 

+

292 

+

293 else: 

+

294 logger.debug(f"[{agent.name}] Reasoning step enabled.") 

+

295 

+

296 # If the reasoning step is enabled, we add a system message to the context asking it to think step-by-step 

+

297 # 

+

298 # 

+

299 #current_messages_context.append({"role": "system", 

+

300 # "content": "In your response, you first use the \"reasoning\" field to think step-by-step about what is the next action and cognitive state that you are going to generate. To do so, you carefully consider: the agent specification given initially; additional instructions given later; and the history of stimuli and actions present in the simulation trajectory." + 

+

301 # "Then, you generate the action in the \"action\" field, and generate cognitive state in the \"cognitive_state\" field." }) 

+

302 current_messages_context.append({"role": "system", 

+

303 "content": "Use the \"reasoning\" field to add any reasoning process you might wish to use before generating the next action and cognitive state. "}) 

+

304 

+

305 next_message = openai_utils.client().send_message(current_messages_context, response_format=CognitiveActionModelWithReasoning) 

+

306 

+

307 logger.debug(f"[{agent.name}] Received message: {next_message}") 

+

308 

+

309 role, content = next_message["role"], utils.extract_json(next_message["content"]) 

+

310 

+

311 action = content['action'] 

+

312 logger.debug(f"{agent.name}'s action: {action}") 

+

313 

+

314 return action, role, content 

+

315 

+

316 ############################################################################################### 

+

317 # Quality evaluation methods 

+

318 ############################################################################################### 

+

319 

+

320 def _check_action_quality(self, stage, agent, tentative_action): 

+

321 

+

322 from tinytroupe.agent import logger # import here to avoid circular import issues 

+

323 

+

324 # 

+

325 # Compute various propositions about the action 

+

326 # 

+

327 persona_adherence_passed, persona_adherence_score, persona_adherence_feedback = \ 

+

328 self._check_proposition(agent, self.action_persona_adherence, tentative_action, enable_proposition_check=self.enable_quality_check_for_persona_adherence) 

+

329 

+

330 selfconsistency_passed, selfconsistency_score, selfconsistency_feedback = \ 

+

331 self._check_proposition(agent, self.action_self_consistency, tentative_action, minimum_required_qty_of_actions=1, enable_proposition_check=self.enable_quality_check_for_selfconsistency) 

+

332 

+

333 fluency_passed, fluency_passed_score, fluency_feedback = \ 

+

334 self._check_proposition(agent, self.action_fluency, tentative_action, enable_proposition_check=self.enable_quality_check_for_fluency) 

+

335 

+

336 suitability_passed, suitability_score, suitability_feedback = \ 

+

337 self._check_proposition(agent, self.action_suitability, tentative_action, enable_proposition_check=self.enable_quality_check_for_suitability) 

+

338 

+

339 similarity_passed, similarity_score, similarity_feedback = \ 

+

340 self._check_next_action_similarity(agent, tentative_action, threshold=self.max_action_similarity, enable_similarity_check=self.enable_quality_check_for_similarity) 

+

341 

+

342 # put the results together 

+

343 good_quality = persona_adherence_passed and selfconsistency_passed and fluency_passed and suitability_passed and similarity_passed 

+

344 total_score = persona_adherence_score + selfconsistency_score + fluency_passed_score + suitability_score + (similarity_score * Proposition.MAX_SCORE) 

+

345 

+

346 combined_feedback = utils.combine_texts( 

+

347 persona_adherence_feedback, selfconsistency_feedback, fluency_feedback, suitability_feedback, similarity_feedback 

+

348 ) 

+

349 

+

350 # give verdict 

+

351 if good_quality: 

+

352 return True, total_score, combined_feedback 

+

353 

+

354 else: 

+

355 

+

356 failure_feedback = \ 

+

357 f""" 

+

358 # Quality feedback 

+

359 

+

360 This is the action that was about to be generated by the agent: 

+

361 {tentative_action} 

+

362 

+

363 Unfortunately, the action failed to pass the quality checks, and therefore was aborted and not added to the similation trajectory.  

+

364 The following problems were detected. 

+

365 """ 

+

366 

+

367 if not persona_adherence_passed: 

+

368 failure_feedback += f""" 

+

369 ## Problem: The action does not adhere to the persona specification. 

+

370 {persona_adherence_feedback} 

+

371 

+

372 ### RECOMMENDATIONS FOR IMPROVEMENT 

+

373 Please follow the recommendations below when trying to generate this action again. 

+

374 

+

375 {self.action_persona_adherence.recommendations_for_improvement()} 

+

376 

+

377 """ 

+

378 

+

379 if not selfconsistency_passed: 

+

380 failure_feedback += f""" 

+

381 ## Problem: The action is not self-consistent. 

+

382 {selfconsistency_feedback} 

+

383 

+

384 ### RECOMMENDATIONS FOR IMPROVEMENT 

+

385 Please follow the recommendations below when trying to generate this action again. 

+

386 

+

387 {self.action_self_consistency.recommendations_for_improvement()} 

+

388 

+

389 """ 

+

390 

+

391 if not fluency_passed: 

+

392 failure_feedback += f""" 

+

393 ## Problem: The action is not fluent. 

+

394 {fluency_feedback} 

+

395 

+

396 ### RECOMMENDATIONS FOR IMPROVEMENT 

+

397 Please follow the recommendations below when trying to generate this action again. 

+

398 

+

399 {self.action_fluency.recommendations_for_improvement()} 

+

400  

+

401 """ 

+

402 

+

403 if not suitability_passed: 

+

404 failure_feedback += f""" 

+

405 ## Problem: The action is not suitable to the situation or task. 

+

406 {suitability_feedback} 

+

407 

+

408 ### RECOMMENDATIONS FOR IMPROVEMENT 

+

409 Please follow the recommendations below when trying to generate this action again. 

+

410 

+

411 {self.action_suitability.recommendations_for_improvement()} 

+

412 

+

413 """ 

+

414 

+

415 if not similarity_passed: 

+

416 failure_feedback += f""" 

+

417 ## Problem: The action is too similar to the previous one. 

+

418 {similarity_feedback} 

+

419 

+

420 """ 

+

421 

+

422 logger.warning(f"[{agent.name}][{stage}] failed to pass quality checks: {failure_feedback}") 

+

423 return False, total_score, failure_feedback 

+

424 

+

425 

+

426 def _check_proposition(self, agent, proposition, tentative_action, minimum_required_qty_of_actions=0, enable_proposition_check=True): 

+

427 

+

428 if enable_proposition_check: 

+

429 if agent.actions_count >= minimum_required_qty_of_actions: 

+

430 result = proposition.score(target=agent, claim_variables={"action": tentative_action}, return_full_response=True) 

+

431 

+

432 value_with_justification = f"Score = {result['value']} (out of {Proposition.MAX_SCORE}). Justification = {result['justification']}" 

+

433 

+

434 if result["value"] >= self.quality_threshold: 

+

435 return True, result["value"], value_with_justification 

+

436 else: 

+

437 return False, result["value"], value_with_justification 

+

438 

+

439 else: 

+

440 return True, Proposition.MAX_SCORE, f"The proposition is trivially true due to the lack of enough actions for comparison." 

+

441 else: 

+

442 # If the proposition check is disabled, we assume it passed 

+

443 return True, Proposition.MAX_SCORE, f"The proposition check is disabled, so it is assumed to have passed." 

+

444 

+

445 def _check_next_action_similarity(self, agent, proposed_next_action, threshold, enable_similarity_check=True): 

+

446 """ 

+

447 Checks the similarity between the agent's current action and a proposed next action. 

+

448 High similarity indicates that the proposed action is too similar to the current one, and this 

+

449 check fails. 

+

450 """ 

+

451 from tinytroupe.agent import logger # import here to avoid circular import issues 

+

452 

+

453 if enable_similarity_check: 

+

454 similarity = utils.next_action_jaccard_similarity(agent, proposed_next_action) 

+

455 logger.debug(f"[{agent.name}] Next-action Jaccard similarity: {similarity}") 

+

456 

+

457 if similarity >= threshold: 

+

458 logger.warning(f"[{agent.name}] Next-action Jaccard similarity is above the threshold ({threshold}).") 

+

459 return False, similarity, f"Similarity = {similarity} (range: 0.0 to 1.0). The action is too similar to the previous one." 

+

460 else: 

+

461 logger.debug(f"[{agent.name}] Next-action Jaccard similarity is below the threshold ({threshold}).") 

+

462 return True, similarity, f"Similarity = {similarity} (range: 0.0 to 1.0). The action is sufficiently different from the previous one." 

+

463 

+

464 else: 

+

465 # If the similarity check is disabled, we assume it passed 

+

466 return True, 0.0, f"The similarity check is disabled, so it is assumed to have passed." 

+

467 

+

468 ################################################################################################ 

+

469 # Action correction methods 

+

470 ################################################################################################ 

+

471 

+

472 def _correct_action(self, action:dict, feedback, llm_role, llm_content): 

+

473 situation = \ 

+

474 f""" 

+

475 The following action by an agent was observed: 

+

476  

+

477 {action} 

+

478 

+

479 However, it does not conform to expectations about this agent behavior,  

+

480 due to the following reasons. 

+

481 {feedback} 

+

482 """ 

+

483 #restructured_situation =\ 

+

484 # utils.restructure_as_observed_vs_expected(\ 

+

485 

+

486 # """) 

+

487 #rule = utils.formulate_corrective_rule(restructured_situation) 

+

488 rules = utils.extract_observed_vs_expected_rules(situation) 

+

489 rephrased_action_content = utils.correct_according_to_rule(action["content"], rules) 

+

490 

+

491 # copy action 

+

492 rephrased_action = action.copy() 

+

493 

+

494 # update content 

+

495 rephrased_action["content"] = rephrased_action_content 

+

496 

+

497 # replace in the 'action' key in the original llm content message 

+

498 llm_content["action"] = rephrased_action 

+

499 

+

500 return rephrased_action, llm_role, llm_content 

+

501 

+

502 def get_statistics(self): 

+

503 regeneration_failure_rate = self.regeneration_failures / self.regeneration_attempts if self.regeneration_attempts else 0 

+

504 direct_correction_failure_rate = self.direct_correction_failures / self.direct_correction_attempts if self.direct_correction_attempts else 0 

+

505 

+

506 regeneration_mean_score = statistics.mean(self.regeneration_scores) if self.regeneration_scores else 0 

+

507 regeneration_sd_score = statistics.stdev(self.regeneration_scores) if len(self.regeneration_scores) > 1 else 0 

+

508 

+

509 direct_correction_mean_score = statistics.mean(self.direct_correction_scores) if self.direct_correction_scores else 0 

+

510 direct_correction_sd_score = statistics.stdev(self.direct_correction_scores) if len(self.direct_correction_scores) > 1 else 0 

+

511 

+

512 original_success_rate = self.total_original_actions_succeeded / self.total_actions_produced if self.total_actions_produced else 0 

+

513 

+

514 return { 

+

515 "regeneration_failure_rate": regeneration_failure_rate, 

+

516 "direct_correction_failure_rate": direct_correction_failure_rate, 

+

517 "regeneration_mean_score": regeneration_mean_score, 

+

518 "regeneration_sd_score": regeneration_sd_score, 

+

519 "direct_correction_mean_score": direct_correction_mean_score, 

+

520 "direct_correction_sd_score": direct_correction_sd_score, 

+

521 "total_actions_produced": self.total_actions_produced, 

+

522 "total_original_actions_succeeded": self.total_original_actions_succeeded, 

+

523 "original_success_rate": original_success_rate, 

+

524 "regeneration_success_rate": 1 - regeneration_failure_rate, 

+

525 "direct_correction_success_rate": 1 - direct_correction_failure_rate 

+

526 } 

+

527 

+

528 

+

529class PoorQualityActionException(Exception): 

+

530 def __init__(self, message="The generated action is of poor quality"): 

+

531 self.message = message 

+

532 super().__init__(self.message) 

+
+ + +